Bender et al (2024) – EMBO

scRNA-seq analysis

Setup

Define root directory that contains the folder with source data. Run script that loads packages and define document-specific variables.

# Inside this Docker container we mount the directory with all the source data as "/projectdir/"
rootdir <- "/projectdir/"
source(paste0(rootdir, "/runStartup.R"))

QC

barcodes <- read.delim(paste0(rootdir, "/source_data/GSE250629_scrnaseq_coldata_unfiltered.tsv.gz"), header = TRUE) %>%
  magrittr::set_rownames(paste(.$barcode, .$sample, sep = "_")) %>%
  dplyr::mutate(
    sample = factor(sample),
    genotype = factor(gsub("_.*", "", sample), levels = c("WT", "URE")),
    replicate = factor(gsub(".*_", "", sample))
  ) %>%
  dplyr::group_by(sample) %>%
  DataFrame() %>%
  `rownames<-`(.$cell)

features <- read.delim(paste0(rootdir, "/source_data/GSE250629_scrnaseq_rowdata_unfiltered.tsv.gz"), header = TRUE) %>%
  `rownames<-`(.$gene)

matrix <- Matrix::readMM(paste0(rootdir, "/source_data/GSE250629_scrnaseq_rawCounts_unfiltered.mtx.gz")) %>%
  as(., "CsparseMatrix")

rownames(matrix) <- rownames(features)
colnames(matrix) <- rownames(barcodes)

sce <- SingleCellExperiment(assays = list(counts = matrix), colData = barcodes, rowData = features)
rm(barcodes, features, matrix)

# Per-cell QC metrics
options(scipen = 999)
qcm <- scuttle::perCellQCMetrics(x = sce, subsets = list(rRNA = rowData(sce)$gene_type == "rRNA", mito = grepl("^mt-", rowData(sce)$gene_name)))
colData(sce) <- cbind(colData(sce), qcm)
rm(qcm)

# Calculate cutoffs based on MAD
coldat <- data.frame(colData(sce))

suppressWarnings({
  outl.detected_bottom <- isOutlier(coldat$detected, log = TRUE, nmads = 3, type = "lower", batch = coldat$sample)
  outl.detected_top <- isOutlier(coldat$detected, log = FALSE, nmads = 3, type = "higher", batch = coldat$sample)
  outl.total_bottom <- isOutlier(coldat$total, log = TRUE, nmads = 2, type = "lower", batch = coldat$sample)
  outl.total_top <- isOutlier(coldat$total, log = FALSE, nmads = 2, type = "higher", batch = coldat$sample)
  outl.mito <- isOutlier(coldat$subsets_mito_percent, nmads = 3, log = FALSE, type = "higher", batch = coldat$sample)
  outl.rRNA <- isOutlier(coldat$subsets_rRNA_percent, nmads = 3, log = FALSE, type = "higher", batch = coldat$sample)
})

# Summarize threshold
outl <- ls(pattern = "^outl.[rrna,mito,total,detected]")

qc_thresholds <- lapply(outl, function(x) {
  atr <- attr(get(x), "thresholds")
  nm <- paste(gsub("1", "", strsplit(x, "\\.")[[1]][2]), rownames(atr), sep = "_")
  tmp <- data.frame(Filter = nm, atr)
  tmp <- tmp[!is.infinite(rowSums(tmp[, 2:ncol(tmp), drop = FALSE])), ]
  tmp$Filter <- gsub("_bottom|_top", "", tmp$Filter)
  rownames(tmp) <- NULL
  return(tmp)
}) %>% do.call(rbind, .)

# keep cells that are FALSE in all QC metrics (so not an outlier)
keep_qc <- do.call(cbind, lapply(outl, function(x) data.frame(get(x)) %>% magrittr::set_colnames(x)))
keep_qc[is.na(keep_qc)] <- TRUE
keep_qc <- rowSums(keep_qc) == 0

sce <- sce[, keep_qc]

rm(outl)

# Plots
coldat_f <- data.frame(colData(sce))

qc_detected_genes_before <-
  plotQC(data = coldat, x = "sample", y = "detected", y.lab = "detected genes") + ylim(0, 10000)

qc_detected_genes_after <-
  plotQC(data = coldat_f, x = "sample", y = "detected", y.lab = "detected genes") + ylim(0, 10000)

qc_total_before <-
  plotQC(data = coldat, x = "sample", y = "total", y.lab = "total UMI") + ylim(0, 120000)

qc_total_after <-
  plotQC(data = coldat_f, x = "sample", y = "total", y.lab = "total UMI") + ylim(0, 120000)

qc_mito_before <-
  plotQC(data = coldat, x = "sample", y = "subsets_mito_percent", y.lab = "% mt reads") + ylim(0, 100)

qc_mito_after <-
  plotQC(data = coldat_f, x = "sample", y = "subsets_mito_percent", y.lab = "% mt reads") + ylim(0, 100)

qc_rRNA_before <-
  plotQC(data = coldat, x = "sample", y = "subsets_rRNA_percent", y.lab = "% rRNA reads") + ylim(0, 100)

qc_rRNA_after <-
  plotQC(data = coldat_f, x = "sample", y = "subsets_rRNA_percent", y.lab = "% rRNA reads") + ylim(0, 100)

rm(coldat, coldat_f)

s1b1 <- qc_detected_genes_before # + ggtitle("Detected genes", "before filtering")
s1b2 <- qc_detected_genes_after # + ggtitle("Detected genes", "after filtering")
s1c1 <- qc_total_before # + ggtitle("total UMI counts", "before filtering")
s1c2 <- qc_total_after # + ggtitle("total UMI counts", "before filtering")
s1d1 <- qc_mito_before # + ggtitle("% Alignment to mt genes", "before filtering")
s1d2 <- qc_mito_after # + ggtitle("% Alignment to mt genes", "before filtering")
s1e1 <- qc_rRNA_before # + ggtitle("% Alignment to rRNA genes", "before filtering")
s1e2 <- qc_rRNA_after # + ggtitle("% Alignment to rRNA genes", "before filtering")

Appendix_Figure_S1B <- (s1b1 + gg.noX) / s1b2
Appendix_Figure_S1B

Appendix_Figure_S1C <- (s1c1 + gg.noX) / s1c2
Appendix_Figure_S1C

Appendix_Figure_S1D <- (s1d1 + gg.noX) / s1d2
Appendix_Figure_S1D
#> Warning: Removed 72 rows containing non-finite values (stat_ydensity).
#> Warning: Removed 72 rows containing non-finite values (stat_boxplot).

Appendix_Figure_S1E <- (s1e1 + gg.noX) / s1e2
Appendix_Figure_S1E
#> Warning: Removed 72 rows containing non-finite values (stat_ydensity).

#> Warning: Removed 72 rows containing non-finite values (stat_boxplot).

rm(list = ls(pattern = "^s1"))

options(scipen = 0)

Mitigation of cell cycle effect

# Normalize per batch, then rescale
sce <- bplapply(unique(sce$sample), BPPARAM = bpparam, function(x) {
  s <- sce[, sce$sample == x]

  set.seed(2020)
  q <- scran::quickCluster(x = s, assay.type = "counts", graph.fun = "louvain")

  csf <- scran::computeSumFactors(s, clusters = q)
  lnc <- scuttle::logNormCounts(csf, assay.type = "counts", size_factors = s$sizeFactor)

  return(lnc)
}) %>% do.call(SingleCellExperiment::cbind, .)

sce <- batchelor::multiBatchNorm(sce, batch = sce$sample)

tophvg_exploration <- sapply(X = levels(sce$genotype), FUN = function(gt) {
  s <- sce[, sce$genotype == gt]

  set.seed(2020)
  mgv <- scran::modelGeneVar(x = s, block = factor(s$sample))
  hvg <- getTopHVGs(mgv, n = 1000)

  return(hvg)
}, simplify = FALSE)

# Integration with WT as reference
integration_order <- unlist(lapply(levels(sce$genotype), function(x) grep(x, unique(sce$sample))))

tmp.mnn <- batchelor::fastMNN(sce,
  batch = sce$sample,
  subset.row = unique(unlist(tophvg_exploration)),
  merge.order = integration_order
)

reducedDim(sce, "corrected") <- reducedDim(tmp.mnn, "corrected")

rm(tmp.mnn)

# Clustering & UMAPs
set.seed(2020)
snngraph <- scran::buildSNNGraph(sce, use.dimred = "corrected", type = "jaccard", k = 20)

sce$cluster <- factor(igraph::cluster_louvain(snngraph)$membership)

rm(snngraph)

set.seed(2020)
sce <- scater::runUMAP(sce, dimred = "corrected", min_dist = 0.75, spread = 0.75, n_neighbors = 10, n_threads = 1)

is_G1 <- as.character(c(1, 10, 8, 6, 9, 13, 5))
is_no.G1 <- as.character(c(2, 4, 11, 7))
is_NA <- as.character(c(3, 12)) # these could be genotype-specific as very different cell numbers in cell_numbers

clusters_assigned <- data.frame(Cluster = levels(sce$cluster)) %>%
  dplyr::mutate(cc_group = case_when(
    Cluster %in% is_G1 ~ "G1",
    Cluster %in% is_no.G1 ~ "non.G1",
    Cluster %in% is_NA ~ "NA"
  ))

sce$cc_group <- factor(clusters_assigned$cc_group[match(sce$cluster, clusters_assigned$Cluster)],
  levels = c("G1", "non.G1", "NA")
)

This is not in the paper, but here for documentation. See separation of UMAP into two “islands” left and right. Each island shows a cluster with high expression of bona fide markers of leukocyte (progenitors). Mki67 is high in one but not the other island. We interpret this as separation by cell cycle phase which is unwanted variation.

plot_umap(sce, by = "cluster", text_use_label = TRUE, label_size = 5) + theme(legend.position = "none")

plot_umap(sce, gene = "Ly6a")

plot_umap(sce, gene = "Gata1")

plot_umap(sce, gene = "Cebpe")

plot_umap(sce, gene = "Ly86")

plot_umap(sce, gene = "Mki67")

Now identify genes differential between islands. That is then the confounding genes to be removed from the HVGs.

# Only test genes detected in at least 25% of cells of any group
pexpr <- get_pexpr(data = assay(sce), group = sce$cc_group)
pexpr_above_25 <- rowMax(as.matrix(pexpr)) > 25

pairwise_t <- scran::pairwiseTTests(x = sce[pexpr_above_25, ], groups = sce$cc_group, block = sce$genotype, lfc = log(2), direction = "any", BPPARAM = bpparam)

pairwise_t_idx <- data.frame(pairwise_t$pair) %>%
  dplyr::filter(first == "G1" & second == "non.G1") %>%
  rownames() %>%
  as.numeric()

confounding_genes <- as.data.frame(pairwise_t$statistics[[pairwise_t_idx]]) %>%
  tibble::rownames_to_column("Gene") %>%
  dplyr::filter(FDR < 0.05) %>%
  pull(Gene) %>%
  data.frame(name = ., gene_id = gsub("_.*", "", .), gene_name = gsub(".*_", "", .))

knitr::kable(confounding_genes)
name gene_id gene_name
ENSMUSG00000026434_Nucks1 ENSMUSG00000026434 Nucks1
ENSMUSG00000026202_Tuba4a ENSMUSG00000026202 Tuba4a
ENSMUSG00000026605_Cenpf ENSMUSG00000026605 Cenpf
ENSMUSG00000027160_Ccdc34 ENSMUSG00000027160 Ccdc34
ENSMUSG00000040084_Bub1b ENSMUSG00000040084 Bub1b
ENSMUSG00000027326_Knl1 ENSMUSG00000027326 Knl1
ENSMUSG00000027306_Nusap1 ENSMUSG00000027306 Nusap1
ENSMUSG00000027469_Tpx2 ENSMUSG00000027469 Tpx2
ENSMUSG00000001403_Ube2c ENSMUSG00000001403 Ube2c
ENSMUSG00000036752_Tubb4b ENSMUSG00000036752 Tubb4b
ENSMUSG00000005233_Spc25 ENSMUSG00000005233 Spc25
ENSMUSG00000048327_Ckap2l ENSMUSG00000048327 Ckap2l
ENSMUSG00000034349_Smc4 ENSMUSG00000034349 Smc4
ENSMUSG00000015749_Anp32e ENSMUSG00000015749 Anp32e
ENSMUSG00000070392_Gm20634 ENSMUSG00000070392 Gm20634
ENSMUSG00000045328_Cenpe ENSMUSG00000045328 Cenpe
ENSMUSG00000037894_H2az1 ENSMUSG00000037894 H2az1
ENSMUSG00000027715_Ccna2 ENSMUSG00000027715 Ccna2
ENSMUSG00000028044_Cks1b ENSMUSG00000028044 Cks1b
ENSMUSG00000068855_H2ac20 ENSMUSG00000068855 H2ac20
ENSMUSG00000028312_Smc2 ENSMUSG00000028312 Smc2
ENSMUSG00000028832_Stmn1 ENSMUSG00000028832 Stmn1
ENSMUSG00000006398_Cdc20 ENSMUSG00000006398 Cdc20
ENSMUSG00000028873_Cdca8 ENSMUSG00000028873 Cdca8
ENSMUSG00000029177_Cenpa ENSMUSG00000029177 Cenpa
ENSMUSG00000025747_Tyms ENSMUSG00000025747 Tyms
ENSMUSG00000023505_Cdca3 ENSMUSG00000023505 Cdca3
ENSMUSG00000098318_Lockd ENSMUSG00000098318 Lockd
ENSMUSG00000030346_Rad51ap1 ENSMUSG00000030346 Rad51ap1
ENSMUSG00000038943_Prc1 ENSMUSG00000038943 Prc1
ENSMUSG00000030978_Rrm1 ENSMUSG00000030978 Rrm1
ENSMUSG00000031004_Mki67 ENSMUSG00000031004 Mki67
ENSMUSG00000054717_Hmgb2 ENSMUSG00000054717 Hmgb2
ENSMUSG00000049932_H2ax ENSMUSG00000049932 H2ax
ENSMUSG00000040204_Pclaf ENSMUSG00000040204 Pclaf
ENSMUSG00000036768_Kif15 ENSMUSG00000036768 Kif15
ENSMUSG00000074476_Spc24 ENSMUSG00000074476 Spc24
ENSMUSG00000019773_Fbxo5 ENSMUSG00000019773 Fbxo5
ENSMUSG00000075266_Cenpw ENSMUSG00000075266 Cenpw
ENSMUSG00000019942_Cdk1 ENSMUSG00000019942 Cdk1
ENSMUSG00000019961_Tmpo ENSMUSG00000019961 Tmpo
ENSMUSG00000020897_Aurkb ENSMUSG00000020897 Aurkb
ENSMUSG00000017716_Birc5 ENSMUSG00000017716 Birc5
ENSMUSG00000020330_Hmmr ENSMUSG00000020330 Hmmr
ENSMUSG00000020914_Top2a ENSMUSG00000020914 Top2a
ENSMUSG00000025574_Tk1 ENSMUSG00000025574 Tk1
ENSMUSG00000020649_Rrm2 ENSMUSG00000020649 Rrm2
ENSMUSG00000047534_Mis18bp1 ENSMUSG00000047534 Mis18bp1
ENSMUSG00000069300_H2bc11 ENSMUSG00000069300 H2bc11
ENSMUSG00000071478_H2ac7 ENSMUSG00000071478 H2ac7
ENSMUSG00000061482_H4c4 ENSMUSG00000061482 H4c4
ENSMUSG00000061615_H2ac4 ENSMUSG00000061615 H2ac4
ENSMUSG00000069267_H3c2 ENSMUSG00000069267 H3c2
ENSMUSG00000049539_H1f1 ENSMUSG00000049539 H1f1
ENSMUSG00000062248_Cks2 ENSMUSG00000062248 Cks2
ENSMUSG00000058773_H1f5 ENSMUSG00000058773 H1f5
ENSMUSG00000094777_Hist1h2ap ENSMUSG00000094777 Hist1h2ap
ENSMUSG00000060639_H4c9 ENSMUSG00000060639 H4c9
ENSMUSG00000069273_H3c6 ENSMUSG00000069273 H3c6
ENSMUSG00000069272_H2ac8 ENSMUSG00000069272 H2ac8
ENSMUSG00000051627_H1f4 ENSMUSG00000051627 H1f4
ENSMUSG00000069310_H3c3 ENSMUSG00000069310 H3c3
ENSMUSG00000006715_Gmnn ENSMUSG00000006715 Gmnn
ENSMUSG00000021377_Dek ENSMUSG00000021377 Dek
ENSMUSG00000022033_Pbk ENSMUSG00000022033 Pbk
ENSMUSG00000022034_Esco2 ENSMUSG00000022034 Esco2
ENSMUSG00000022360_Atad2 ENSMUSG00000022360 Atad2
ENSMUSG00000023004_Tuba1b ENSMUSG00000023004 Tuba1b
ENSMUSG00000007050_Lsm2 ENSMUSG00000007050 Lsm2
ENSMUSG00000001525_Tubb5 ENSMUSG00000001525 Tubb5
ENSMUSG00000024590_Lmnb1 ENSMUSG00000024590 Lmnb1
ENSMUSG00000024795_Kif20b ENSMUSG00000024795 Kif20b
ENSMUSG00000012443_Kif11 ENSMUSG00000012443 Kif11
ENSMUSG00000024660_Incenp ENSMUSG00000024660 Incenp
dput(confounding_genes$name)
#> c("ENSMUSG00000026434_Nucks1", "ENSMUSG00000026202_Tuba4a", "ENSMUSG00000026605_Cenpf", 
#> "ENSMUSG00000027160_Ccdc34", "ENSMUSG00000040084_Bub1b", "ENSMUSG00000027326_Knl1", 
#> "ENSMUSG00000027306_Nusap1", "ENSMUSG00000027469_Tpx2", "ENSMUSG00000001403_Ube2c", 
#> "ENSMUSG00000036752_Tubb4b", "ENSMUSG00000005233_Spc25", "ENSMUSG00000048327_Ckap2l", 
#> "ENSMUSG00000034349_Smc4", "ENSMUSG00000015749_Anp32e", "ENSMUSG00000070392_Gm20634", 
#> "ENSMUSG00000045328_Cenpe", "ENSMUSG00000037894_H2az1", "ENSMUSG00000027715_Ccna2", 
#> "ENSMUSG00000028044_Cks1b", "ENSMUSG00000068855_H2ac20", "ENSMUSG00000028312_Smc2", 
#> "ENSMUSG00000028832_Stmn1", "ENSMUSG00000006398_Cdc20", "ENSMUSG00000028873_Cdca8", 
#> "ENSMUSG00000029177_Cenpa", "ENSMUSG00000025747_Tyms", "ENSMUSG00000023505_Cdca3", 
#> "ENSMUSG00000098318_Lockd", "ENSMUSG00000030346_Rad51ap1", "ENSMUSG00000038943_Prc1", 
#> "ENSMUSG00000030978_Rrm1", "ENSMUSG00000031004_Mki67", "ENSMUSG00000054717_Hmgb2", 
#> "ENSMUSG00000049932_H2ax", "ENSMUSG00000040204_Pclaf", "ENSMUSG00000036768_Kif15", 
#> "ENSMUSG00000074476_Spc24", "ENSMUSG00000019773_Fbxo5", "ENSMUSG00000075266_Cenpw", 
#> "ENSMUSG00000019942_Cdk1", "ENSMUSG00000019961_Tmpo", "ENSMUSG00000020897_Aurkb", 
#> "ENSMUSG00000017716_Birc5", "ENSMUSG00000020330_Hmmr", "ENSMUSG00000020914_Top2a", 
#> "ENSMUSG00000025574_Tk1", "ENSMUSG00000020649_Rrm2", "ENSMUSG00000047534_Mis18bp1", 
#> "ENSMUSG00000069300_H2bc11", "ENSMUSG00000071478_H2ac7", "ENSMUSG00000061482_H4c4", 
#> "ENSMUSG00000061615_H2ac4", "ENSMUSG00000069267_H3c2", "ENSMUSG00000049539_H1f1", 
#> "ENSMUSG00000062248_Cks2", "ENSMUSG00000058773_H1f5", "ENSMUSG00000094777_Hist1h2ap", 
#> "ENSMUSG00000060639_H4c9", "ENSMUSG00000069273_H3c6", "ENSMUSG00000069272_H2ac8", 
#> "ENSMUSG00000051627_H1f4", "ENSMUSG00000069310_H3c3", "ENSMUSG00000006715_Gmnn", 
#> "ENSMUSG00000021377_Dek", "ENSMUSG00000022033_Pbk", "ENSMUSG00000022034_Esco2", 
#> "ENSMUSG00000022360_Atad2", "ENSMUSG00000023004_Tuba1b", "ENSMUSG00000007050_Lsm2", 
#> "ENSMUSG00000001525_Tubb5", "ENSMUSG00000024590_Lmnb1", "ENSMUSG00000024795_Kif20b", 
#> "ENSMUSG00000012443_Kif11", "ENSMUSG00000024660_Incenp")
# Clean up and start over on next chunk
reducedDim(sce) <- NULL
sce$cluster <- NULL

Clustering and UMAP

Nack in the day when we did this analysis we used sctransform for feature selection, but turned out later that sctransform is not fully reproducible across machines, see for example https://github.com/satijalab/sctransform/issues/68

As we based the shRNA screen on exactly this analysis we provide below a dput of the HVGs returned by this initial analysis, as running it now on a different machine produces slightly different results.

# Code for feature selection via sctransform
if (FALSE) {

  # Run sctransform per genotype, regressing batch (=replicate)
  sct_ranked <- mclapply(levels(sce$genotype), mc.cores = mc_workers, function(x) {
    s <- sce[, sce$genotype %in% x]
    cell_attr <- data.frame(batch = factor(as.character(s$sample)), row.names = colnames(s))
    bv <- "batch"

    set.seed(2020)
    i <- suppressMessages(sctransform::vst(
      umi = assay(s, "counts"),
      method = "glmGamPoi",
      verbosity = 0,
      batch_var = bv,
      cell_attr = cell_attr,
      return_gene_attr = TRUE
    )$gene_attr)

    return(i[order(-i$residual_variance), ])
  })
  names(sct_ranked) <- levels(sce$genotype)

  # data.frame with ordered residual variance filtering
  sctransform_vst <- lapply(X = names(sct_ranked), FUN = function(x) {
    y <- sct_ranked[[x]]
    m <- data.frame(Seq = seq(1, nrow(y)), resVar = y$residual_variance)
    m$Genotype <- factor(x)
    return(m[m$Seq < 10000, ])
  }) %>% do.call(rbind, .)

  invisible(gc())

  # Combine the top 1000 HVGs per genotype and remove the confounding genes identified above
  ul <- unlist(lapply(sct_ranked, function(x) head(rownames(x), 1000)))
  tophvg_final <- setdiff(unique(ul), confounding_genes$name)
}

# That is the highly-variable genes from sctransform from the original analysis done on the MacBook as described above in
# the introduction text of this chunk.
tophvg_final <- c(
  "ENSMUSG00000020125_Elane", "ENSMUSG00000056054_S100a8", "ENSMUSG00000027556_Car1",
  "ENSMUSG00000029373_Pf4", "ENSMUSG00000094248_Hist1h2ao", "ENSMUSG00000062456_Rpl9-ps6",
  "ENSMUSG00000066315_Gm12918", "ENSMUSG00000113948_Rpl17-ps3",
  "ENSMUSG00000070343_Gm10288", "ENSMUSG00000056071_S100a9", "ENSMUSG00000084416_Rpl10a-ps1",
  "ENSMUSG00000068240_Gm11808", "ENSMUSG00000069972_Rps13-ps2",
  "ENSMUSG00000024610_Cd74", "ENSMUSG00000089989_Gm45713", "ENSMUSG00000095597_Rps7-ps3",
  "ENSMUSG00000040314_Ctsg", "ENSMUSG00000036594_H2-Aa", "ENSMUSG00000081406_Rps6-ps4",
  "ENSMUSG00000057729_Prtn3", "ENSMUSG00000055093_Gm8430", "ENSMUSG00000057036_Gm7536",
  "ENSMUSG00000050299_Gm9843", "ENSMUSG00000081604_Gm11518", "ENSMUSG00000058603_Rpl28-ps1",
  "ENSMUSG00000025491_Ifitm1", "ENSMUSG00000025014_Dntt", "ENSMUSG00000022584_Ly6c2",
  "ENSMUSG00000009350_Mpo", "ENSMUSG00000100755_Rps23-ps1", "ENSMUSG00000027447_Cst3",
  "ENSMUSG00000022157_Mcpt8", "ENSMUSG00000002985_Apoe", "ENSMUSG00000060586_H2-Eb1",
  "ENSMUSG00000056399_Prss34", "ENSMUSG00000100862_Gm10925", "ENSMUSG00000024681_Ms4a3",
  "ENSMUSG00000073421_H2-Ab1", "ENSMUSG00000001865_Cpa3", "ENSMUSG00000084349_Rpl3-ps1",
  "ENSMUSG00000069682_Gm10275", "ENSMUSG00000073940_Hbb-bt", "ENSMUSG00000027073_Prg2",
  "ENSMUSG00000027562_Car2", "ENSMUSG00000027072_Prg3", "ENSMUSG00000078126_Rpl23a-ps3",
  "ENSMUSG00000050335_Lgals3", "ENSMUSG00000078087_Rps12l1", "ENSMUSG00000069301_H2ac11",
  "ENSMUSG00000060419_Rps16-ps2", "ENSMUSG00000061167_Rpl15-ps3",
  "ENSMUSG00000101249_Gm29216", "ENSMUSG00000061477_Rps7", "ENSMUSG00000090137_Uba52",
  "ENSMUSG00000020644_Id2", "ENSMUSG00000078377_Gm4294", "ENSMUSG00000092341_Malat1",
  "ENSMUSG00000067870_Rpl31-ps8", "ENSMUSG00000069792_Wfdc17",
  "ENSMUSG00000044258_Ctla2a", "ENSMUSG00000067038_Rps12-ps3",
  "ENSMUSG00000083621_Gm14586", "ENSMUSG00000040466_Blvrb", "ENSMUSG00000024680_Ms4a2",
  "ENSMUSG00000030214_Plbd1", "ENSMUSG00000015437_Gzmb", "ENSMUSG00000051748_Wfdc21",
  "ENSMUSG00000026835_Fcnb", "ENSMUSG00000049775_Tmsb4x", "ENSMUSG00000079523_Tmsb10",
  "ENSMUSG00000006360_Crip1", "ENSMUSG00000076490_Trbc1", "ENSMUSG00000038357_Camp",
  "ENSMUSG00000052234_Epx", "ENSMUSG00000055553_Kxd1", "ENSMUSG00000025351_Cd63",
  "ENSMUSG00000080921_Rpl38-ps2", "ENSMUSG00000057262_Rpl15-ps6",
  "ENSMUSG00000040809_Chil3", "ENSMUSG00000030470_Csrp3", "ENSMUSG00000005339_Fcer1a",
  "ENSMUSG00000032484_Ngp", "ENSMUSG00000116835_Gm49594", "ENSMUSG00000028266_Lmo4",
  "ENSMUSG00000102070_Gm28661", "ENSMUSG00000000682_Cd52", "ENSMUSG00000054203_Ifi205",
  "ENSMUSG00000105388_Rpl36a-ps2", "ENSMUSG00000004612_Nkg7", "ENSMUSG00000050621_Rps27rt",
  "ENSMUSG00000034634_Ly6d", "ENSMUSG00000035042_Ccl5", "ENSMUSG00000069919_Hba-a1",
  "ENSMUSG00000106037_Gm4332", "ENSMUSG00000031765_Mt1", "ENSMUSG00000068220_Lgals1",
  "ENSMUSG00000080859_Rpl10-ps1", "ENSMUSG00000071516_H2ac13",
  "ENSMUSG00000031762_Mt2", "ENSMUSG00000023571_C1qtnf12", "ENSMUSG00000029322_Plac8",
  "ENSMUSG00000039109_F13a1", "ENSMUSG00000069270_H2ac6", "ENSMUSG00000086583_Gm15500",
  "ENSMUSG00000078193_Gm2000", "ENSMUSG00000069309_Hist1h2an",
  "ENSMUSG00000110679_Rpl10-ps5", "ENSMUSG00000030413_Pglyrp1",
  "ENSMUSG00000079419_Ms4a6c", "ENSMUSG00000073002_Vamp5", "ENSMUSG00000031722_Hp",
  "ENSMUSG00000100801_Gm15459", "ENSMUSG00000017002_Slpi", "ENSMUSG00000064357_mt-Atp6",
  "ENSMUSG00000003882_Il7r", "ENSMUSG00000052435_Cebpe", "ENSMUSG00000106106_CT010467.1",
  "ENSMUSG00000024397_Aif1", "ENSMUSG00000058135_Gstm1", "ENSMUSG00000001020_S100a4",
  "ENSMUSG00000053063_Clec12a", "ENSMUSG00000022018_Rgcc", "ENSMUSG00000003949_Hlf",
  "ENSMUSG00000030579_Tyrobp", "ENSMUSG00000024399_Ltb", "ENSMUSG00000085442_Gm3362",
  "ENSMUSG00000015937_Macroh2a1", "ENSMUSG00000044285_Ubb-ps",
  "ENSMUSG00000085723_Gm15915", "ENSMUSG00000018819_Lsp1", "ENSMUSG00000063412_Gm10131",
  "ENSMUSG00000026630_Batf3", "ENSMUSG00000045954_Cavin2", "ENSMUSG00000057666_Gapdh",
  "ENSMUSG00000028644_Ermap", "ENSMUSG00000117338_Gm49804", "ENSMUSG00000076757_Trgc4",
  "ENSMUSG00000005474_Myl10", "ENSMUSG00000106588_Gm17590", "ENSMUSG00000001025_S100a6",
  "ENSMUSG00000068407_Rnase12", "ENSMUSG00000038179_Slamf7", "ENSMUSG00000081094_Rpl19-ps11",
  "ENSMUSG00000020077_Srgn", "ENSMUSG00000025163_Cd7", "ENSMUSG00000020857_Nme2",
  "ENSMUSG00000087412_Gm15501", "ENSMUSG00000084106_Gm6136", "ENSMUSG00000076498_Trbc2",
  "ENSMUSG00000034855_Cxcl10", "ENSMUSG00000000782_Tcf7", "ENSMUSG00000032786_Alas1",
  "ENSMUSG00000069302_H2ac12", "ENSMUSG00000076749_Trgc1", "ENSMUSG00000069014_Gm5641",
  "ENSMUSG00000058715_Fcer1g", "ENSMUSG00000094724_Rnaset2b", "ENSMUSG00000078122_F630028O10Rik",
  "ENSMUSG00000021728_Emb", "ENSMUSG00000052305_Hbb-bs", "ENSMUSG00000025492_Ifitm3",
  "ENSMUSG00000003814_Calr", "ENSMUSG00000096006_Gm21596", "ENSMUSG00000066362_Rps13-ps1",
  "ENSMUSG00000047844_Bex4", "ENSMUSG00000058443_Rpl10-ps3", "ENSMUSG00000069516_Lyz2",
  "ENSMUSG00000041481_Serpina3g", "ENSMUSG00000072601_Ear1", "ENSMUSG00000030851_Ldhc",
  "ENSMUSG00000044678_Ly6k", "ENSMUSG00000071052_Rpl7a-ps5", "ENSMUSG00000095687_Rnaset2a",
  "ENSMUSG00000026822_Lcn2", "ENSMUSG00000022504_Ciita", "ENSMUSG00000001270_Ckb",
  "ENSMUSG00000039209_Rpl39l", "ENSMUSG00000015932_Dstn", "ENSMUSG00000015053_Gata2",
  "ENSMUSG00000054072_Iigp1", "ENSMUSG00000079017_Ifi27l2a", "ENSMUSG00000032575_Manf",
  "ENSMUSG00000026728_Vim", "ENSMUSG00000032323_Cyp11a1", "ENSMUSG00000076431_Sox4",
  "ENSMUSG00000004655_Aqp1", "ENSMUSG00000020323_Prss57", "ENSMUSG00000098915_Rpl15-ps2",
  "ENSMUSG00000093798_Gm8355", "ENSMUSG00000044734_Serpinb1a",
  "ENSMUSG00000063556_Gm10132", "ENSMUSG00000053541_Gm4759", "ENSMUSG00000034459_Ifit1",
  "ENSMUSG00000093674_Rpl41", "ENSMUSG00000053310_Nrgn", "ENSMUSG00000004207_Psap",
  "ENSMUSG00000110841_Gpx4-ps2", "ENSMUSG00000023995_Tspo2", "ENSMUSG00000091086_Rpl6l",
  "ENSMUSG00000081051_Gm15427", "ENSMUSG00000032238_Rora", "ENSMUSG00000016494_Cd34",
  "ENSMUSG00000002565_Scin", "ENSMUSG00000074607_Tox2", "ENSMUSG00000075602_Ly6a",
  "ENSMUSG00000031628_Casp3", "ENSMUSG00000030148_Clec4a2", "ENSMUSG00000022309_Angpt1",
  "ENSMUSG00000026536_Ifi211", "ENSMUSG00000059776_Rpl13-ps6",
  "ENSMUSG00000026285_Pdcd1", "ENSMUSG00000076617_Ighm", "ENSMUSG00000027907_S100a11",
  "ENSMUSG00000072596_Ear2", "ENSMUSG00000063286_Gm8995", "ENSMUSG00000072692_Rpl37rt",
  "ENSMUSG00000039997_Ifi203", "ENSMUSG00000026864_Hspa5", "ENSMUSG00000034664_Itga2b",
  "ENSMUSG00000071532_Gm10335", "ENSMUSG00000005583_Mef2c", "ENSMUSG00000031785_Adgrg1",
  "ENSMUSG00000052212_Cd177", "ENSMUSG00000066068_Gm13611", "ENSMUSG00000038393_Txnip",
  "ENSMUSG00000029484_Anxa3", "ENSMUSG00000060509_Xcr1", "ENSMUSG00000041754_Trem3",
  "ENSMUSG00000081855_Rpl17-ps5", "ENSMUSG00000113149_Gm49383",
  "ENSMUSG00000068129_Cst7", "ENSMUSG00000085342_Gm12254", "ENSMUSG00000031838_Ifi30",
  "ENSMUSG00000001930_Vwf", "ENSMUSG00000018102_H2bc4", "ENSMUSG00000032221_Mns1",
  "ENSMUSG00000078350_Smim1", "ENSMUSG00000032503_Arpp21", "ENSMUSG00000024659_Anxa1",
  "ENSMUSG00000021423_Ly86", "ENSMUSG00000059461_Gm7331", "ENSMUSG00000020048_Hsp90b1",
  "ENSMUSG00000064354_mt-Co2", "ENSMUSG00000052681_Rap1b", "ENSMUSG00000024953_Prdx5",
  "ENSMUSG00000054191_Klf1", "ENSMUSG00000059498_Fcgr3", "ENSMUSG00000041959_S100a10",
  "ENSMUSG00000030165_Klrd1", "ENSMUSG00000017144_Rnd3", "ENSMUSG00000028825_Rhd",
  "ENSMUSG00000106549_Gm42653", "ENSMUSG00000047675_Rps8", "ENSMUSG00000029530_Ccr9",
  "ENSMUSG00000113625_Gm3379", "ENSMUSG00000024014_Pim1", "ENSMUSG00000050761_Gp1bb",
  "ENSMUSG00000030785_Cox6a2", "ENSMUSG00000030342_Cd9", "ENSMUSG00000039167_Adgrl4",
  "ENSMUSG00000030654_Arl6ip1", "ENSMUSG00000029413_Naaa", "ENSMUSG00000012405_Rpl15",
  "ENSMUSG00000078606_Gm4070", "ENSMUSG00000021880_Rnase6", "ENSMUSG00000021268_Meg3",
  "ENSMUSG00000054626_Xlr", "ENSMUSG00000057098_Ebf1", "ENSMUSG00000078921_Tgtp2",
  "ENSMUSG00000011752_Pgam1", "ENSMUSG00000031167_Rbm3", "ENSMUSG00000044533_Rps2",
  "ENSMUSG00000100210_H3c7", "ENSMUSG00000101609_Kcnq1ot1", "ENSMUSG00000062270_Morf4l1",
  "ENSMUSG00000007892_Rplp1", "ENSMUSG00000081087_Rps15a-ps7",
  "ENSMUSG00000033427_Upb1", "ENSMUSG00000025290_Rps24", "ENSMUSG00000020044_Timp3",
  "ENSMUSG00000032496_Ltf", "ENSMUSG00000004032_Gstm5", "ENSMUSG00000026418_Tnni1",
  "ENSMUSG00000083716_Gm13436", "ENSMUSG00000042817_Flt3", "ENSMUSG00000070713_Gm10282",
  "ENSMUSG00000082585_Gm15387", "ENSMUSG00000045868_Gvin1", "ENSMUSG00000027669_Gnb4",
  "ENSMUSG00000069267_H3c2", "ENSMUSG00000031877_Ces2g", "ENSMUSG00000091228_Gm20390",
  "ENSMUSG00000008843_Cldn13", "ENSMUSG00000060962_Dmkn", "ENSMUSG00000060198_Gm11353",
  "ENSMUSG00000027533_Fabp5", "ENSMUSG00000026672_Optn", "ENSMUSG00000029616_Erp29",
  "ENSMUSG00000031125_3830403N18Rik", "ENSMUSG00000031639_Tlr3",
  "ENSMUSG00000020641_Rsad2", "ENSMUSG00000069917_Hba-a2", "ENSMUSG00000074896_Ifit3",
  "ENSMUSG00000067719_Gm10221", "ENSMUSG00000046805_Mpeg1", "ENSMUSG00000019122_Ccl9",
  "ENSMUSG00000010406_Mrpl52", "ENSMUSG00000043770_Gm12481", "ENSMUSG00000029713_Gnb2",
  "ENSMUSG00000079139_Gm4204", "ENSMUSG00000049124_Gm8186", "ENSMUSG00000020160_Meis1",
  "ENSMUSG00000086841_2410006H16Rik", "ENSMUSG00000042476_Abcb4",
  "ENSMUSG00000003038_Hmgn2", "ENSMUSG00000048251_Bcl11b", "ENSMUSG00000022708_Zbtb20",
  "ENSMUSG00000116648_Rpl31-ps12", "ENSMUSG00000023993_Treml1",
  "ENSMUSG00000035004_Igsf6", "ENSMUSG00000060860_Ube2s", "ENSMUSG00000027962_Vcam1",
  "ENSMUSG00000029580_Actb", "ENSMUSG00000041362_Shtn1", "ENSMUSG00000048534_Jaml",
  "ENSMUSG00000028648_Ndufs5", "ENSMUSG00000062727_H2bc12", "ENSMUSG00000001281_Itgb7",
  "ENSMUSG00000022148_Fyb", "ENSMUSG00000060550_H2-Q7", "ENSMUSG00000022456_Septin3",
  "ENSMUSG00000030707_Coro1a", "ENSMUSG00000075705_Msrb1", "ENSMUSG00000032359_Ctsh",
  "ENSMUSG00000063171_Rps4l", "ENSMUSG00000032265_Tent5a", "ENSMUSG00000026921_Egfl7",
  "ENSMUSG00000047867_Gimap6", "ENSMUSG00000046080_Clec9a", "ENSMUSG00000060803_Gstp1",
  "ENSMUSG00000055148_Klf2", "ENSMUSG00000043263_Ifi209", "ENSMUSG00000038845_Phb",
  "ENSMUSG00000003032_Klf4", "ENSMUSG00000044703_Phf11a", "ENSMUSG00000020120_Plek",
  "ENSMUSG00000046908_Ltb4r1", "ENSMUSG00000097467_Gm26737", "ENSMUSG00000045817_Zfp36l2",
  "ENSMUSG00000060377_Rpl36a-ps1", "ENSMUSG00000075031_H2bc3",
  "ENSMUSG00000030220_Arhgdib", "ENSMUSG00000096255_Dynlt1b", "ENSMUSG00000054404_Slfn5",
  "ENSMUSG00000020571_Pdia6", "ENSMUSG00000091955_Gm9844", "ENSMUSG00000031584_Gsr",
  "ENSMUSG00000038900_Rpl12", "ENSMUSG00000021360_Gcnt2", "ENSMUSG00000089929_Bcl2a1b",
  "ENSMUSG00000074604_Mgst2", "ENSMUSG00000117869_Snhg4", "ENSMUSG00000061878_Sphk1",
  "ENSMUSG00000071141_Rpl36a-ps3", "ENSMUSG00000099974_Bcl2a1d",
  "ENSMUSG00000049103_Ccr2", "ENSMUSG00000027360_Hdc", "ENSMUSG00000005611_Mrvi1",
  "ENSMUSG00000025997_Ikzf2", "ENSMUSG00000115938_Gm17241", "ENSMUSG00000064351_mt-Co1",
  "ENSMUSG00000092074_Dynlt1a", "ENSMUSG00000046402_Rbp1", "ENSMUSG00000037849_Ifi206",
  "ENSMUSG00000029810_Tmem176b", "ENSMUSG00000024480_Ap3s1", "ENSMUSG00000031074_Fgf3",
  "ENSMUSG00000090164_BC035044", "ENSMUSG00000099583_H3c4", "ENSMUSG00000029417_Cxcl9",
  "ENSMUSG00000073555_Gm4951", "ENSMUSG00000000983_Wfdc18", "ENSMUSG00000051811_Cox6b2",
  "ENSMUSG00000026815_Gfi1b", "ENSMUSG00000032218_Ccnb2", "ENSMUSG00000023927_Satb1",
  "ENSMUSG00000028159_Dapp1", "ENSMUSG00000031494_Cd209a", "ENSMUSG00000030122_Ptms",
  "ENSMUSG00000103168_Gm30948", "ENSMUSG00000060073_Psma3", "ENSMUSG00000028124_Gclm",
  "ENSMUSG00000020900_Myh10", "ENSMUSG00000061991_H2ac10", "ENSMUSG00000020009_Ifngr1",
  "ENSMUSG00000022769_Sdf2l1", "ENSMUSG00000032231_Anxa2", "ENSMUSG00000029596_Sdsl",
  "ENSMUSG00000062582_Rpl30-ps8", "ENSMUSG00000064339_mt-Rnr2",
  "ENSMUSG00000033355_Rtp4", "ENSMUSG00000052534_Pbx1", "ENSMUSG00000026238_Ptma",
  "ENSMUSG00000032353_Tmed3", "ENSMUSG00000030577_Cd22", "ENSMUSG00000000579_Dynlt1c",
  "ENSMUSG00000008496_Pou2f2", "ENSMUSG00000021675_F2rl2", "ENSMUSG00000109509_Rps12-ps4",
  "ENSMUSG00000036172_Cd200r3", "ENSMUSG00000069769_Msi2", "ENSMUSG00000006362_Cbfa2t3",
  "ENSMUSG00000022820_Ndufb4", "ENSMUSG00000030144_Clec4d", "ENSMUSG00000009687_Fxyd5",
  "ENSMUSG00000051723_Rpl31-ps13", "ENSMUSG00000045658_Pid1", "ENSMUSG00000056888_Glipr1",
  "ENSMUSG00000025130_P4hb", "ENSMUSG00000000740_Rpl13", "ENSMUSG00000040938_Slc16a11",
  "ENSMUSG00000036887_C1qa", "ENSMUSG00000038418_Egr1", "ENSMUSG00000048490_Nrip1",
  "ENSMUSG00000026581_Sell", "ENSMUSG00000073902_Gm1966", "ENSMUSG00000064246_Chil1",
  "ENSMUSG00000028410_Dnaja1", "ENSMUSG00000060131_Atp8b4", "ENSMUSG00000030336_Cd27",
  "ENSMUSG00000025480_Syce1", "ENSMUSG00000016756_Cmah", "ENSMUSG00000031996_Aplp2",
  "ENSMUSG00000058126_Tpm3-rs7", "ENSMUSG00000085241_Snhg3", "ENSMUSG00000032294_Pkm",
  "ENSMUSG00000094989_Rpl9-ps4", "ENSMUSG00000006519_Cyba", "ENSMUSG00000004552_Ctse",
  "ENSMUSG00000064215_Ifi27", "ENSMUSG00000079547_H2-DMb1", "ENSMUSG00000047965_Rpl9-ps7",
  "ENSMUSG00000083496_Gm11263", "ENSMUSG00000020601_Trib2", "ENSMUSG00000060981_H4c8",
  "ENSMUSG00000053332_Gas5", "ENSMUSG00000101972_H3c11", "ENSMUSG00000034160_Ogt",
  "ENSMUSG00000037742_Eef1a1", "ENSMUSG00000034723_Tmx4", "ENSMUSG00000022587_Ly6e",
  "ENSMUSG00000069041_Slc25a31", "ENSMUSG00000033213_AA467197",
  "ENSMUSG00000059336_Slc14a1", "ENSMUSG00000074578_Zfas1", "ENSMUSG00000031162_Gata1",
  "ENSMUSG00000015217_Hmgb3", "ENSMUSG00000028037_Ifi44", "ENSMUSG00000003355_Fkbp11",
  "ENSMUSG00000027248_Pdia3", "ENSMUSG00000090942_F830016B08Rik",
  "ENSMUSG00000044424_Gm9493", "ENSMUSG00000048376_F2r", "ENSMUSG00000041431_Ccnb1",
  "ENSMUSG00000068396_Rpl34-ps1", "ENSMUSG00000048752_Prss50",
  "ENSMUSG00000045826_Ptprcap", "ENSMUSG00000102051_Ly6a2", "ENSMUSG00000024924_Vldlr",
  "ENSMUSG00000027364_Usp50", "ENSMUSG00000058975_Kcnc1", "ENSMUSG00000116504_I730030J21Rik",
  "ENSMUSG00000024511_Rab27b", "ENSMUSG00000008682_Rpl10", "ENSMUSG00000033952_Aspm",
  "ENSMUSG00000081485_Gm12338", "ENSMUSG00000038421_Fcrla", "ENSMUSG00000024975_Pdcd4",
  "ENSMUSG00000034353_Ramp1", "ENSMUSG00000098371_Gm28037", "ENSMUSG00000002204_Napsa",
  "ENSMUSG00000029752_Asns", "ENSMUSG00000057841_Rpl32", "ENSMUSG00000066543_Rpl17-ps9",
  "ENSMUSG00000032946_Rasgrp2", "ENSMUSG00000008668_Rps18", "ENSMUSG00000051504_Siglech",
  "ENSMUSG00000000982_Ccl3", "ENSMUSG00000030688_Stard10", "ENSMUSG00000015340_Cybb",
  "ENSMUSG00000037169_Mycn", "ENSMUSG00000018930_Ccl4", "ENSMUSG00000068600_Gml2",
  "ENSMUSG00000049037_Clec4a1", "ENSMUSG00000030291_Med21", "ENSMUSG00000036469_Marchf1",
  "ENSMUSG00000063564_Col23a1", "ENSMUSG00000000486_Septin1", "ENSMUSG00000038642_Ctss",
  "ENSMUSG00000017723_Wfdc2", "ENSMUSG00000032254_Kif23", "ENSMUSG00000078974_Sec61g",
  "ENSMUSG00000021281_Tnfaip2", "ENSMUSG00000022824_Muc13", "ENSMUSG00000000861_Bcl11a",
  "ENSMUSG00000013523_Bcas1", "ENSMUSG00000090272_Mndal", "ENSMUSG00000023828_Slc22a3",
  "ENSMUSG00000075010_AW112010", "ENSMUSG00000024121_Atp6v0c",
  "ENSMUSG00000037336_Mfsd2b", "ENSMUSG00000027593_Raly", "ENSMUSG00000060678_H4c3",
  "ENSMUSG00000000303_Cdh1", "ENSMUSG00000032518_Rpsa", "ENSMUSG00000028843_Sh3bgrl3",
  "ENSMUSG00000006389_Mpl", "ENSMUSG00000106926_Rpl7-ps7", "ENSMUSG00000105652_4930519L02Rik",
  "ENSMUSG00000037095_Lrg1", "ENSMUSG00000085787_Gm13092", "ENSMUSG00000060063_Alox5ap",
  "ENSMUSG00000041329_Atp1b2", "ENSMUSG00000025094_Slc18a2", "ENSMUSG00000064358_mt-Co3",
  "ENSMUSG00000063694_Cycs", "ENSMUSG00000028234_Rps20", "ENSMUSG00000026837_Col5a1",
  "ENSMUSG00000056124_B4galt6", "ENSMUSG00000107061_Gm19590", "ENSMUSG00000041272_Tox",
  "ENSMUSG00000055639_Dach1", "ENSMUSG00000044550_Tceal3", "ENSMUSG00000106918_Mrpl33",
  "ENSMUSG00000081738_Hmgb1-ps2", "ENSMUSG00000030147_Clec4b1",
  "ENSMUSG00000005413_Hmox1", "ENSMUSG00000036896_C1qc", "ENSMUSG00000032698_Lmo2",
  "ENSMUSG00000009633_G0s2", "ENSMUSG00000044220_Nkx2-3", "ENSMUSG00000021998_Lcp1",
  "ENSMUSG00000032028_Nxpe2", "ENSMUSG00000041836_Ptpre", "ENSMUSG00000096001_2610528A11Rik",
  "ENSMUSG00000059159_Gm8129", "ENSMUSG00000017493_Igfbp4", "ENSMUSG00000027009_Itga4",
  "ENSMUSG00000046916_Myct1", "ENSMUSG00000069833_Ahnak", "ENSMUSG00000026483_Fam129a",
  "ENSMUSG00000043091_Tuba1c", "ENSMUSG00000095677_Dynlt1f", "ENSMUSG00000034868_Myl12b",
  "ENSMUSG00000030345_Dyrk4", "ENSMUSG00000031799_Tpm4", "ENSMUSG00000052854_Nrk",
  "ENSMUSG00000079137_Rpl27-ps1", "ENSMUSG00000057135_Scimp", "ENSMUSG00000090136_Gm10177",
  "ENSMUSG00000058624_Gda", "ENSMUSG00000031391_L1cam", "ENSMUSG00000026126_Ptpn18",
  "ENSMUSG00000023367_Tmem176a", "ENSMUSG00000033177_Tmprss7",
  "ENSMUSG00000028381_Ugcg", "ENSMUSG00000001739_Cldn15", "ENSMUSG00000096210_H1f0",
  "ENSMUSG00000035248_Tut7", "ENSMUSG00000073490_Ifi207", "ENSMUSG00000024227_Pdzph1",
  "ENSMUSG00000053168_9030619P08Rik", "ENSMUSG00000061232_H2-K1",
  "ENSMUSG00000087107_AI662270", "ENSMUSG00000068466_Gm5518", "ENSMUSG00000107383_Gm4366",
  "ENSMUSG00000104982_Gm32554", "ENSMUSG00000087326_Gm12503", "ENSMUSG00000029658_Wdr95",
  "ENSMUSG00000037649_H2-DMa", "ENSMUSG00000022025_Cnmd", "ENSMUSG00000067344_Rps25-ps1",
  "ENSMUSG00000031097_Tnni2", "ENSMUSG00000056394_Lig1", "ENSMUSG00000030432_Rpl28",
  "ENSMUSG00000029622_Arpc1b", "ENSMUSG00000049625_Tifab", "ENSMUSG00000028063_Lmna",
  "ENSMUSG00000006356_Crip2", "ENSMUSG00000022742_Cpox", "ENSMUSG00000022010_Tsc22d1",
  "ENSMUSG00000011148_Adssl1", "ENSMUSG00000031229_Atrx", "ENSMUSG00000024675_Ms4a4c",
  "ENSMUSG00000041736_Tspo", "ENSMUSG00000002111_Spi1", "ENSMUSG00000036185_Sapcd1",
  "ENSMUSG00000025701_Alox5", "ENSMUSG00000057329_Bcl2", "ENSMUSG00000091649_Phf11b",
  "ENSMUSG00000084111_Gm15710", "ENSMUSG00000086503_Xist", "ENSMUSG00000048442_Smim5",
  "ENSMUSG00000053317_Sec61b", "ENSMUSG00000006442_Srm", "ENSMUSG00000085791_Rpl30-ps9",
  "ENSMUSG00000066406_Akap13", "ENSMUSG00000050075_Gpr171", "ENSMUSG00000117570_Gm38410",
  "ENSMUSG00000035472_Slc25a21", "ENSMUSG00000073412_Lst1", "ENSMUSG00000024896_Minpp1",
  "ENSMUSG00000037062_Sh3glb1", "ENSMUSG00000007480_Mc5r", "ENSMUSG00000026234_Ncl",
  "ENSMUSG00000029923_Rab19", "ENSMUSG00000044927_H1f10", "ENSMUSG00000096883_Shisa8",
  "ENSMUSG00000063524_Eno1", "ENSMUSG00000040229_Gpr34", "ENSMUSG00000052684_Jun",
  "ENSMUSG00000057163_Prss2", "ENSMUSG00000101111_Gm28437", "ENSMUSG00000020460_Rps27a",
  "ENSMUSG00000026358_Rgs1", "ENSMUSG00000040274_Cdk6", "ENSMUSG00000000184_Ccnd2",
  "ENSMUSG00000021065_Fut8", "ENSMUSG00000095609_Gm21188", "ENSMUSG00000066363_Serpina3f",
  "ENSMUSG00000051212_Gpr183", "ENSMUSG00000015889_Lta4h", "ENSMUSG00000032870_Smap2",
  "ENSMUSG00000032766_Gng11", "ENSMUSG00000036905_C1qb", "ENSMUSG00000075702_Selenom",
  "ENSMUSG00000046532_Ar", "ENSMUSG00000067767_Clec4b2", "ENSMUSG00000026450_Chit1",
  "ENSMUSG00000020577_Tspan13", "ENSMUSG00000033910_Gucy1a1", "ENSMUSG00000033307_Mif",
  "ENSMUSG00000052565_H1f3", "ENSMUSG00000022240_Ctnnd2", "ENSMUSG00000061132_Blnk",
  "ENSMUSG00000035692_Isg15", "ENSMUSG00000095217_H2bc15", "ENSMUSG00000024065_Ehd3",
  "ENSMUSG00000031586_Rbpms", "ENSMUSG00000099391_1700003C15Rik",
  "ENSMUSG00000026721_Rabgap1l", "ENSMUSG00000070348_Ccnd1", "ENSMUSG00000067274_Rplp0",
  "ENSMUSG00000037628_Cdkn3", "ENSMUSG00000009585_Apobec3", "ENSMUSG00000047181_Samd14",
  "ENSMUSG00000029167_Ppargc1a", "ENSMUSG00000023868_Pde10a", "ENSMUSG00000029838_Ptn",
  "ENSMUSG00000075269_Bex6", "ENSMUSG00000031007_Atp6ap2", "ENSMUSG00000027776_Il12a",
  "ENSMUSG00000026357_Rgs18", "ENSMUSG00000036908_Unc93b1", "ENSMUSG00000016256_Ctsz",
  "ENSMUSG00000040128_Pnrc1", "ENSMUSG00000091844_Gm8251", "ENSMUSG00000086925_Gm6286",
  "ENSMUSG00000060791_Gmfg", "ENSMUSG00000056758_Hmga2", "ENSMUSG00000069308_Hist1h2bp",
  "ENSMUSG00000066902_Rps23-ps2", "ENSMUSG00000022876_Samsn1",
  "ENSMUSG00000028565_Nfia", "ENSMUSG00000025980_Hspd1", "ENSMUSG00000003418_St8sia6",
  "ENSMUSG00000117446_Gm35551", "ENSMUSG00000020315_Sptbn1", "ENSMUSG00000007039_Ddah2",
  "ENSMUSG00000040699_Limd2", "ENSMUSG00000058838_Rps27a-ps2",
  "ENSMUSG00000054428_Atpif1", "ENSMUSG00000001123_Lgals9", "ENSMUSG00000028716_Pdzk1ip1",
  "ENSMUSG00000058385_H2bc8", "ENSMUSG00000061983_Rps12", "ENSMUSG00000022885_St6gal1",
  "ENSMUSG00000004642_Slbp", "ENSMUSG00000019982_Myb", "ENSMUSG00000020396_Nefh",
  "ENSMUSG00000000753_Serpinf1", "ENSMUSG00000054892_Txk", "ENSMUSG00000058818_Pirb",
  "ENSMUSG00000064147_Rab44", "ENSMUSG00000018474_Chd3", "ENSMUSG00000002578_Ikzf4",
  "ENSMUSG00000008540_Mgst1", "ENSMUSG00000028654_Mycl", "ENSMUSG00000030867_Plk1",
  "ENSMUSG00000034028_Cd226", "ENSMUSG00000016194_Hsd11b1", "ENSMUSG00000064337_mt-Rnr1",
  "ENSMUSG00000041571_Selenow", "ENSMUSG00000027435_Cd93", "ENSMUSG00000028393_Alad",
  "ENSMUSG00000054641_Mmrn1", "ENSMUSG00000024164_C3", "ENSMUSG00000099764_Rps10-ps2",
  "ENSMUSG00000045690_Wdr89", "ENSMUSG00000049422_Chchd10", "ENSMUSG00000024338_Psmb8",
  "ENSMUSG00000042675_Ypel3", "ENSMUSG00000050232_Cxcr3", "ENSMUSG00000059775_Rps26-ps1",
  "ENSMUSG00000030159_Clec1b", "ENSMUSG00000028005_Gucy1b1", "ENSMUSG00000052336_Cx3cr1",
  "ENSMUSG00000049036_Tmem121", "ENSMUSG00000047246_H2bc6", "ENSMUSG00000026872_Zeb2",
  "ENSMUSG00000056671_Prelid2", "ENSMUSG00000044783_Hjurp", "ENSMUSG00000028497_Hacd4",
  "ENSMUSG00000057696_Rpl30-ps1", "ENSMUSG00000030365_Clec2i",
  "ENSMUSG00000030659_Nucb2", "ENSMUSG00000071337_Tia1", "ENSMUSG00000005125_Ndrg1",
  "ENSMUSG00000000594_Gm2a", "ENSMUSG00000031170_Slc38a5", "ENSMUSG00000040447_Spns2",
  "ENSMUSG00000025780_Itih5", "ENSMUSG00000004610_Etfb", "ENSMUSG00000022797_Tfrc",
  "ENSMUSG00000005161_Prdx2", "ENSMUSG00000072620_Slfn2", "ENSMUSG00000081378_Rps13-ps4",
  "ENSMUSG00000026950_Neb", "ENSMUSG00000053886_Sh2d4a", "ENSMUSG00000002058_Unc119",
  "ENSMUSG00000043873_Chil5", "ENSMUSG00000035929_H2-Q4", "ENSMUSG00000040430_Pitpnc1",
  "ENSMUSG00000022108_Itm2b", "ENSMUSG00000064023_Klk8", "ENSMUSG00000040152_Thbs1",
  "ENSMUSG00000039910_Cited2", "ENSMUSG00000068227_Il2rb", "ENSMUSG00000026235_Epha4",
  "ENSMUSG00000059108_Ifitm6", "ENSMUSG00000028248_Pnisr", "ENSMUSG00000046434_Hnrnpa1",
  "ENSMUSG00000019505_Ubb", "ENSMUSG00000096950_Gm9530", "ENSMUSG00000034647_Ankrd12",
  "ENSMUSG00000034764_1700006J14Rik", "ENSMUSG00000052713_Zfp608",
  "ENSMUSG00000025647_Shisa5", "ENSMUSG00000028581_Laptm5", "ENSMUSG00000028378_Ptgr1",
  "ENSMUSG00000091478_Gm10039", "ENSMUSG00000025289_Prdx4", "ENSMUSG00000022014_Epsti1",
  "ENSMUSG00000039236_Isg20", "ENSMUSG00000026458_Ppfia4", "ENSMUSG00000021102_Glrx5",
  "ENSMUSG00000021270_Hsp90aa1", "ENSMUSG00000115025_Gm4240", "ENSMUSG00000091957_Rps2-ps10",
  "ENSMUSG00000017309_Cd300lg", "ENSMUSG00000000244_Tspan32", "ENSMUSG00000044162_Tnip3",
  "ENSMUSG00000044252_Osbpl1a", "ENSMUSG00000028214_Gem", "ENSMUSG00000084235_Gm15421",
  "ENSMUSG00000102189_Gm37194", "ENSMUSG00000026021_Sumo1", "ENSMUSG00000116988_Gm49673",
  "ENSMUSG00000030717_Nupr1", "ENSMUSG00000074785_Plxnc1", "ENSMUSG00000019874_Fabp7",
  "ENSMUSG00000053318_Slamf8", "ENSMUSG00000074417_Gm14548", "ENSMUSG00000060591_Ifitm2",
  "ENSMUSG00000032126_Hmbs", "ENSMUSG00000031997_Trpc6", "ENSMUSG00000034993_Vat1",
  "ENSMUSG00000043993_2900052L18Rik", "ENSMUSG00000090733_Rps27",
  "ENSMUSG00000024533_Spire1", "ENSMUSG00000027999_Pla2g12a", "ENSMUSG00000030798_Cd37",
  "ENSMUSG00000002068_Ccne1", "ENSMUSG00000068706_Gm10250", "ENSMUSG00000031146_Plp2",
  "ENSMUSG00000001986_Gria3", "ENSMUSG00000073409_H2-Q6", "ENSMUSG00000056290_Ms4a4b",
  "ENSMUSG00000032034_Kcnj5", "ENSMUSG00000074403_H3c13", "ENSMUSG00000001014_Icam4",
  "ENSMUSG00000052160_Pld4", "ENSMUSG00000042524_Sun2", "ENSMUSG00000048163_Selplg",
  "ENSMUSG00000020808_Pimreg", "ENSMUSG00000041890_Git2", "ENSMUSG00000024052_Lpin2",
  "ENSMUSG00000021025_Nfkbia", "ENSMUSG00000050071_Bex1", "ENSMUSG00000055069_Rab39",
  "ENSMUSG00000112808_Gm4739", "ENSMUSG00000025001_Hells", "ENSMUSG00000057113_Npm1",
  "ENSMUSG00000040528_Milr1", "ENSMUSG00000026249_Serpine2", "ENSMUSG00000029591_Ung",
  "ENSMUSG00000027330_Cdc25b", "ENSMUSG00000080776_Gm12174", "ENSMUSG00000118161_Rps2-ps8",
  "ENSMUSG00000030246_Ldhb", "ENSMUSG00000030157_Clec2d", "ENSMUSG00000041020_Map7d2",
  "ENSMUSG00000021758_Ddx4", "ENSMUSG00000105501_5330426L24Rik",
  "ENSMUSG00000095427_Rps2-ps6", "ENSMUSG00000019828_Grm1", "ENSMUSG00000040751_Lat2",
  "ENSMUSG00000030651_Art2b", "ENSMUSG00000018377_Vezf1", "ENSMUSG00000029561_Oasl2",
  "ENSMUSG00000028457_Atp8b5", "ENSMUSG00000101013_A630072M18Rik",
  "ENSMUSG00000047139_Cd24a", "ENSMUSG00000024456_Diaph1", "ENSMUSG00000026074_Map4k4",
  "ENSMUSG00000032584_Mst1r", "ENSMUSG00000104605_Gm42922", "ENSMUSG00000040681_Hmgn1",
  "ENSMUSG00000038489_Polr2l", "ENSMUSG00000025498_Irf7", "ENSMUSG00000024677_Ms4a6b",
  "ENSMUSG00000001128_Cfp", "ENSMUSG00000030742_Lat", "ENSMUSG00000059743_Fdps",
  "ENSMUSG00000023067_Cdkn1a", "ENSMUSG00000051639_Fbl-ps2", "ENSMUSG00000033713_Foxn3",
  "ENSMUSG00000043102_Qrfp", "ENSMUSG00000014599_Csf1", "ENSMUSG00000024910_Ctsw",
  "ENSMUSG00000021760_Gpx8", "ENSMUSG00000017009_Sdc4", "ENSMUSG00000104955_1700016F12Rik",
  "ENSMUSG00000058927_Gm10053", "ENSMUSG00000033082_Clec1a", "ENSMUSG00000027203_Dut",
  "ENSMUSG00000053477_Tcf4", "ENSMUSG00000062901_Klhl24", "ENSMUSG00000015355_Cd48",
  "ENSMUSG00000031870_Pgr", "ENSMUSG00000034833_Tespa1", "ENSMUSG00000003420_Fcgrt",
  "ENSMUSG00000022659_Gcsam", "ENSMUSG00000018362_Kpna2", "ENSMUSG00000084085_Gm16140",
  "ENSMUSG00000004897_Hdgf", "ENSMUSG00000115338_Pnp", "ENSMUSG00000061390_Uba52-ps",
  "ENSMUSG00000035569_Ankrd11", "ENSMUSG00000079293_Clec7a", "ENSMUSG00000025486_Sirt3",
  "ENSMUSG00000026385_Dbi", "ENSMUSG00000039145_Camk1d", "ENSMUSG00000036181_H1f2",
  "ENSMUSG00000062825_Actg1", "ENSMUSG00000053907_Mat2a", "ENSMUSG00000068349_Gml",
  "ENSMUSG00000042606_Hirip3", "ENSMUSG00000022952_Runx1", "ENSMUSG00000038623_Tm6sf1",
  "ENSMUSG00000099927_Gm8226", "ENSMUSG00000067147_Rpl7a-ps11",
  "ENSMUSG00000071637_Cebpd", "ENSMUSG00000030325_Klrb1c", "ENSMUSG00000082292_Gm12250",
  "ENSMUSG00000042770_Hebp1", "ENSMUSG00000113061_Rps18-ps5", "ENSMUSG00000020737_Jpt1",
  "ENSMUSG00000112605_Gm7476", "ENSMUSG00000029673_Auts2", "ENSMUSG00000074800_Gm4149",
  "ENSMUSG00000032966_Fkbp1a", "ENSMUSG00000035085_1700020L24Rik",
  "ENSMUSG00000020846_Rflnb", "ENSMUSG00000015312_Gadd45b", "ENSMUSG00000052727_Map1b",
  "ENSMUSG00000050473_Slc35d3", "ENSMUSG00000038508_Gdf15", "ENSMUSG00000022475_Hdac7",
  "ENSMUSG00000002028_Kmt2a", "ENSMUSG00000066407_Gm10263", "ENSMUSG00000026712_Mrc1",
  "ENSMUSG00000026938_Fcna", "ENSMUSG00000081058_H3c15", "ENSMUSG00000005672_Kit",
  "ENSMUSG00000069793_Slfn9", "ENSMUSG00000031780_Ccl17", "ENSMUSG00000024965_Fermt3",
  "ENSMUSG00000107176_Gm9794", "ENSMUSG00000102332_Gm19331", "ENSMUSG00000060572_Mfap2",
  "ENSMUSG00000067212_H2-T23", "ENSMUSG00000044867_Gimap1os", "ENSMUSG00000026580_Selp",
  "ENSMUSG00000030844_Rgs10", "ENSMUSG00000042750_Bex2", "ENSMUSG00000082901_Rps8-ps5",
  "ENSMUSG00000023944_Hsp90ab1", "ENSMUSG00000042207_Kdm5b", "ENSMUSG00000025804_Ccr1",
  "ENSMUSG00000030094_Xpc", "ENSMUSG00000078671_Chd2", "ENSMUSG00000020732_Rab37",
  "ENSMUSG00000031613_Hpgd", "ENSMUSG00000081740_Gm14279", "ENSMUSG00000038872_Zfhx3",
  "ENSMUSG00000028961_Pgd", "ENSMUSG00000027342_Pcna", "ENSMUSG00000040732_Erg",
  "ENSMUSG00000092274_Neat1", "ENSMUSG00000079563_Pglyrp2", "ENSMUSG00000026360_Rgs2",
  "ENSMUSG00000105867_Gm42517", "ENSMUSG00000110902_Gm33104", "ENSMUSG00000002014_Ssr4",
  "ENSMUSG00000005533_Igf1r", "ENSMUSG00000036461_Elf1", "ENSMUSG00000117442_1810073O08Rik",
  "ENSMUSG00000037664_Cdkn1c", "ENSMUSG00000037138_Aff3", "ENSMUSG00000036186_Dipk1b",
  "ENSMUSG00000002107_Celf2", "ENSMUSG00000071303_Rps8-ps1", "ENSMUSG00000042757_Tmem108",
  "ENSMUSG00000039196_Orm1", "ENSMUSG00000022651_Retnlg", "ENSMUSG00000005054_Cstb",
  "ENSMUSG00000071561_Cstdc5", "ENSMUSG00000029359_Tesc", "ENSMUSG00000033981_Gria2",
  "ENSMUSG00000022225_Cma1", "ENSMUSG00000021702_Thbs4", "ENSMUSG00000024245_Tmem178",
  "ENSMUSG00000013974_Mcemp1", "ENSMUSG00000071562_Stfa1", "ENSMUSG00000071715_Ncf4",
  "ENSMUSG00000036256_Igfbp7", "ENSMUSG00000072940_Gm10443", "ENSMUSG00000041324_Inhba",
  "ENSMUSG00000022582_Ly6g", "ENSMUSG00000000157_Itgb2l", "ENSMUSG00000022026_Olfm4",
  "ENSMUSG00000074272_Ceacam1", "ENSMUSG00000096054_Syne1", "ENSMUSG00000067149_Jchain",
  "ENSMUSG00000029304_Spp1", "ENSMUSG00000033220_Rac2", "ENSMUSG00000046727_Cystm1",
  "ENSMUSG00000046180_4930550L24Rik", "ENSMUSG00000063856_Gpx1",
  "ENSMUSG00000019987_Arg1", "ENSMUSG00000059956_Serpinb12", "ENSMUSG00000082475_Gm7206",
  "ENSMUSG00000024907_Gal", "ENSMUSG00000028603_Scp2", "ENSMUSG00000019851_Perp",
  "ENSMUSG00000031397_Tktl1", "ENSMUSG00000106874_Gm20186", "ENSMUSG00000053279_Aldh1a1",
  "ENSMUSG00000045034_Ankrd34b", "ENSMUSG00000117380_E430002N23Rik",
  "ENSMUSG00000030711_Sult1a1", "ENSMUSG00000033083_Tbc1d4", "ENSMUSG00000026070_Il18r1",
  "ENSMUSG00000023132_Gzma", "ENSMUSG00000021879_Dnah12", "ENSMUSG00000022037_Clu",
  "ENSMUSG00000030142_Clec4e", "ENSMUSG00000039081_Zfp503", "ENSMUSG00000025064_Col17a1",
  "ENSMUSG00000079186_Gzmc", "ENSMUSG00000053846_Lipg", "ENSMUSG00000089901_Gm8113",
  "ENSMUSG00000003379_Cd79a", "ENSMUSG00000054169_Ceacam10", "ENSMUSG00000030231_Plekha5",
  "ENSMUSG00000078922_Tgtp1", "ENSMUSG00000047953_Gp5", "ENSMUSG00000019194_Scn1b",
  "ENSMUSG00000087075_Lbhd2", "ENSMUSG00000075420_Smim6", "ENSMUSG00000055817_Mta3",
  "ENSMUSG00000068806_Olfr1259", "ENSMUSG00000015950_Ncf1", "ENSMUSG00000082141_Gm11212",
  "ENSMUSG00000022528_Hes1", "ENSMUSG00000027611_Procr", "ENSMUSG00000029372_Ppbp",
  "ENSMUSG00000054435_Gimap4", "ENSMUSG00000062210_Tnfaip8", "ENSMUSG00000042745_Id1",
  "ENSMUSG00000001225_Slc26a3", "ENSMUSG00000030775_Trat1", "ENSMUSG00000098240_Gm4575",
  "ENSMUSG00000052396_Mogat2", "ENSMUSG00000023333_Gcm1", "ENSMUSG00000091472_Gm3739",
  "ENSMUSG00000022378_Fam49b", "ENSMUSG00000004864_Mapk13", "ENSMUSG00000116876_Gm49721",
  "ENSMUSG00000020407_Upp1", "ENSMUSG00000046447_Camk2n1", "ENSMUSG00000047880_Cxcr5",
  "ENSMUSG00000096472_Cdkn2d", "ENSMUSG00000049577_Zfpm1", "ENSMUSG00000028184_Adgrl2",
  "ENSMUSG00000066682_Pilrb2", "ENSMUSG00000051839_Gypa", "ENSMUSG00000050552_Lamtor4",
  "ENSMUSG00000051022_Hs3st1", "ENSMUSG00000054871_Tmem158", "ENSMUSG00000117485_Gm19696",
  "ENSMUSG00000002033_Cd3g", "ENSMUSG00000061143_Maml3", "ENSMUSG00000039899_Fgl2",
  "ENSMUSG00000056025_Clca3a1", "ENSMUSG00000030786_Itgam", "ENSMUSG00000070803_Cited4",
  "ENSMUSG00000059970_Hspa2", "ENSMUSG00000085629_Gm11697", "ENSMUSG00000029661_Col1a2",
  "ENSMUSG00000066026_Dhrs3", "ENSMUSG00000117098_Gm49909", "ENSMUSG00000026922_Agpat2",
  "ENSMUSG00000118196_Gm29946", "ENSMUSG00000047361_Gm973", "ENSMUSG00000064109_Hcst",
  "ENSMUSG00000074874_Ctla2b", "ENSMUSG00000049907_Rasl11b", "ENSMUSG00000036427_Gpi1",
  "ENSMUSG00000038903_Ccdc68", "ENSMUSG00000029379_Cxcl3", "ENSMUSG00000042700_Sipa1l1",
  "ENSMUSG00000021200_Asb2", "ENSMUSG00000029275_Gfi1", "ENSMUSG00000040747_Cd53",
  "ENSMUSG00000038463_Olfml2b", "ENSMUSG00000029096_Htra3", "ENSMUSG00000032311_Nrg4",
  "ENSMUSG00000040026_Saa3", "ENSMUSG00000029915_Clec5a", "ENSMUSG00000045569_Mc2r",
  "ENSMUSG00000022443_Myh9", "ENSMUSG00000062393_Dgkk", "ENSMUSG00000030399_Ckm",
  "ENSMUSG00000021701_Plk2", "ENSMUSG00000042751_Nmnat2", "ENSMUSG00000038893_Fam117a",
  "ENSMUSG00000017737_Mmp9", "ENSMUSG00000031451_Gas6", "ENSMUSG00000027221_Chst1",
  "ENSMUSG00000032905_Atg12", "ENSMUSG00000020609_Apob", "ENSMUSG00000035227_Spcs2",
  "ENSMUSG00000034607_Pof1b", "ENSMUSG00000074141_Il4i1", "ENSMUSG00000035021_Baz1a",
  "ENSMUSG00000016024_Lbp", "ENSMUSG00000054619_Mettl7a1", "ENSMUSG00000103800_Pcdha8",
  "ENSMUSG00000049149_Olfr1258", "ENSMUSG00000018593_Sparc", "ENSMUSG00000026594_Ralgps2",
  "ENSMUSG00000097221_1810049J17Rik", "ENSMUSG00000049313_Sorl1",
  "ENSMUSG00000026604_Ptpn14", "ENSMUSG00000036816_Atoh7", "ENSMUSG00000104793_Gm43756",
  "ENSMUSG00000028717_Tal1", "ENSMUSG00000067235_H2-Q10", "ENSMUSG00000058799_Nap1l1",
  "ENSMUSG00000026069_Il1rl1", "ENSMUSG00000049130_C5ar1", "ENSMUSG00000073676_Hspe1",
  "ENSMUSG00000020388_Pdlim4", "ENSMUSG00000118653_AC159819.1",
  "ENSMUSG00000074336_Apoc4", "ENSMUSG00000046959_Slc26a1", "ENSMUSG00000026009_Icos",
  "ENSMUSG00000101739_Gm5733", "ENSMUSG00000067613_Krt83", "ENSMUSG00000051650_B3gnt2",
  "ENSMUSG00000042354_Gnl3", "ENSMUSG00000057058_Skap1", "ENSMUSG00000046841_Ckap4",
  "ENSMUSG00000032556_Bfsp2", "ENSMUSG00000055435_Maf", "ENSMUSG00000026893_Gca",
  "ENSMUSG00000037710_Cisd1", "ENSMUSG00000076609_Igkc", "ENSMUSG00000045763_Basp1",
  "ENSMUSG00000027239_Mdk", "ENSMUSG00000027808_Serp1", "ENSMUSG00000029866_Kel",
  "ENSMUSG00000023926_Rhag", "ENSMUSG00000086320_Gm12840", "ENSMUSG00000001946_Esam",
  "ENSMUSG00000021614_Vcan", "ENSMUSG00000041859_Mcm3", "ENSMUSG00000025245_Lztfl1",
  "ENSMUSG00000031355_Arhgap6", "ENSMUSG00000093507_Gm20627", "ENSMUSG00000104876_Trdc",
  "ENSMUSG00000021676_Iqgap2", "ENSMUSG00000034881_Tbxa2r", "ENSMUSG00000008193_Spib",
  "ENSMUSG00000030671_Pde3b", "ENSMUSG00000021591_Glrx", "ENSMUSG00000032011_Thy1",
  "ENSMUSG00000035000_Dpp4", "ENSMUSG00000038037_Socs1", "ENSMUSG00000055322_Tns1",
  "ENSMUSG00000030054_Gp9", "ENSMUSG00000038227_Hoxa9", "ENSMUSG00000024587_Nars",
  "ENSMUSG00000039542_Ncam1", "ENSMUSG00000038604_Ripor1", "ENSMUSG00000061603_Akap6",
  "ENSMUSG00000098650_Commd1b", "ENSMUSG00000040462_Os9", "ENSMUSG00000071415_Rpl23",
  "ENSMUSG00000025746_Il6", "ENSMUSG00000040822_1700123O20Rik",
  "ENSMUSG00000025104_Hdgfl3", "ENSMUSG00000027496_Aurka", "ENSMUSG00000027636_Sla2",
  "ENSMUSG00000056643_Chst13", "ENSMUSG00000024620_Pdgfrb", "ENSMUSG00000025724_Sec11a",
  "ENSMUSG00000063415_Cyp26b1", "ENSMUSG00000043207_Zmpste24",
  "ENSMUSG00000074269_Rec114", "ENSMUSG00000039529_Atp8b1", "ENSMUSG00000043557_Mdga1",
  "ENSMUSG00000027199_Gatm", "ENSMUSG00000116615_4930548J01Rik",
  "ENSMUSG00000044629_Cnrip1", "ENSMUSG00000027763_Mbnl1", "ENSMUSG00000063232_Serpina11",
  "ENSMUSG00000044055_Otos", "ENSMUSG00000024608_Rps14", "ENSMUSG00000024793_Tnfrsf25",
  "ENSMUSG00000076755_Trgv1", "ENSMUSG00000042284_Itga1", "ENSMUSG00000033554_Dph5",
  "ENSMUSG00000027347_Rasgrp1", "ENSMUSG00000029204_Rhoh", "ENSMUSG00000025503_Taldo1",
  "ENSMUSG00000102154_Gm37469", "ENSMUSG00000012422_Tmem167", "ENSMUSG00000019817_Plagl1",
  "ENSMUSG00000046245_Pilra", "ENSMUSG00000096719_Mrgpra2b", "ENSMUSG00000027030_Stk39",
  "ENSMUSG00000027331_Knstrn", "ENSMUSG00000105945_Gm43570", "ENSMUSG00000030559_Rab38",
  "ENSMUSG00000022892_App", "ENSMUSG00000030000_Add2", "ENSMUSG00000071714_Csf2rb2",
  "ENSMUSG00000034755_Pcdh11x", "ENSMUSG00000028228_Cpne3", "ENSMUSG00000063450_Syne2",
  "ENSMUSG00000028927_Padi2", "ENSMUSG00000020484_Xbp1", "ENSMUSG00000033192_Lpcat2",
  "ENSMUSG00000001763_Tspan33", "ENSMUSG00000020717_Pecam1", "ENSMUSG00000025777_Gdap1",
  "ENSMUSG00000104068_Gm37199", "ENSMUSG00000062646_Ganc", "ENSMUSG00000006589_Aprt",
  "ENSMUSG00000028464_Tpm2", "ENSMUSG00000030617_Ccdc83", "ENSMUSG00000051339_2900026A02Rik",
  "ENSMUSG00000049350_Zg16", "ENSMUSG00000020689_Itgb3", "ENSMUSG00000015316_Slamf1",
  "ENSMUSG00000022226_Mcpt2", "ENSMUSG00000099980_Gm5619", "ENSMUSG00000041355_Ssr2",
  "ENSMUSG00000029860_Zyx", "ENSMUSG00000029553_Tfec", "ENSMUSG00000026473_Glul",
  "ENSMUSG00000075701_Selenos", "ENSMUSG00000090019_Gimap1", "ENSMUSG00000071547_Nt5dc2",
  "ENSMUSG00000036523_Greb1", "ENSMUSG00000022868_Ahsg", "ENSMUSG00000086763_Plxna4os1",
  "ENSMUSG00000066672_Olfr417", "ENSMUSG00000056091_St3gal5", "ENSMUSG00000069873_4930438A08Rik",
  "ENSMUSG00000026655_Fam107b", "ENSMUSG00000023010_Tmbim6", "ENSMUSG00000034936_Arl4d",
  "ENSMUSG00000028332_Hemgn", "ENSMUSG00000004473_Clec11a", "ENSMUSG00000040998_Npnt",
  "ENSMUSG00000004665_Cnn2", "ENSMUSG00000049493_Pls1", "ENSMUSG00000085912_Trp53cor1",
  "ENSMUSG00000037235_Mxd4", "ENSMUSG00000004961_Syt5", "ENSMUSG00000022346_Myc",
  "ENSMUSG00000021485_Mxd3", "ENSMUSG00000085786_Gm15987", "ENSMUSG00000104806_Gm42566",
  "ENSMUSG00000015843_Rxrg", "ENSMUSG00000032657_Fam189b", "ENSMUSG00000042462_Dctpp1",
  "ENSMUSG00000034957_Cebpa", "ENSMUSG00000068606_Gm4841", "ENSMUSG00000021403_Serpinb9b",
  "ENSMUSG00000116652_B830017H08Rik", "ENSMUSG00000026463_Atp2b4",
  "ENSMUSG00000018920_Cxcl16", "ENSMUSG00000029563_Foxp2", "ENSMUSG00000031497_Tnfsf13b"
)

# Integration
set.seed(2020)
tmp <- batchelor::fastMNN(sce[tophvg_final, ], batch = sce$sample, merge.order = integration_order)
reducedDim(sce, "corrected") <- reducedDim(tmp, "corrected")
rm(tmp)

# Clustering
set.seed(2020)
snngraph <- scran::buildSNNGraph(sce, use.dimred = "corrected", type = "jaccard", k = 10)

sce$cluster <- factor(igraph::cluster_louvain(snngraph)$membership)
rm(snngraph)

# UMAP
set.seed(2020)
sce <- scater::runUMAP(sce, dimred = "corrected", min_dist = .5, spread = .5)

Check for doublets. Back in the day when we ran this the first time we failed to set an RNGseed, which now is explicitely mentioned in the scDblFInder manual that one has to to make it reproducible. Hence, we provide a dput with the original result of this analysis.

# This is how the code must look to be reproducible, with RNGseed!
if (FALSE) {

  # This is how we did incorrectly run it, using set.seed()
  lb <- length(unique(sce$sample))
  bp <- BiocParallel::MulticoreParam(ifelse(mc_workers > lb, lb, mc_workers), RNGseed = 1) # <=  RNGseed!!!!! is critical !!!!!

  set.seed(2020)
  sce <- scDblFinder::scDblFinder(
    sce = sce, clusters = as.character(sce$cluster),
    samples = sce$sample, BPPARAM = bp
  )

  # This is how it would have been correct, using RNGseed
  lb <- length(unique(sce$sample))
  bp <- BiocParallel::MulticoreParam(ifelse(mc_workers > lb, lb, mc_workers), RNGseed = 1)
  sce <- scDblFinder(sce, clusters = "cluster", samples = "sample", BPPARAM = bp)
}

# Here the dput of the original output of scDblFinder with cells blacklisted as doublets
is_doublet_cells <- c(
  "URE_rep1_cell233", "URE_rep1_cell1914", "URE_rep1_cell3963",
  "URE_rep1_cell4143", "URE_rep2_cell634", "WT_rep1_cell19", "WT_rep1_cell540",
  "WT_rep1_cell1826", "WT_rep1_cell1898", "WT_rep2_cell7", "URE_rep1_cell162",
  "URE_rep1_cell163", "URE_rep1_cell167", "URE_rep1_cell170", "URE_rep1_cell194",
  "URE_rep1_cell195", "URE_rep1_cell197", "URE_rep1_cell206", "URE_rep1_cell207",
  "URE_rep1_cell209", "URE_rep1_cell210", "URE_rep1_cell222", "URE_rep1_cell227",
  "URE_rep1_cell240", "URE_rep1_cell241", "URE_rep1_cell246", "URE_rep1_cell253",
  "URE_rep1_cell254", "URE_rep1_cell255", "URE_rep1_cell270", "URE_rep1_cell277",
  "URE_rep1_cell278", "URE_rep1_cell299", "URE_rep1_cell302", "URE_rep1_cell315",
  "URE_rep1_cell324", "URE_rep1_cell328", "URE_rep1_cell331", "URE_rep1_cell332",
  "URE_rep1_cell335", "URE_rep1_cell349", "URE_rep1_cell351", "URE_rep1_cell377",
  "URE_rep1_cell378", "URE_rep1_cell379", "URE_rep1_cell381", "URE_rep1_cell384",
  "URE_rep1_cell385", "URE_rep1_cell393", "URE_rep1_cell400", "URE_rep1_cell402",
  "URE_rep1_cell417", "URE_rep1_cell423", "URE_rep1_cell428", "URE_rep1_cell429",
  "URE_rep1_cell436", "URE_rep1_cell441", "URE_rep1_cell442", "URE_rep1_cell446",
  "URE_rep1_cell449", "URE_rep1_cell451", "URE_rep1_cell460", "URE_rep1_cell465",
  "URE_rep1_cell467", "URE_rep1_cell479", "URE_rep1_cell481", "URE_rep1_cell482",
  "URE_rep1_cell484", "URE_rep1_cell488", "URE_rep1_cell491", "URE_rep1_cell500",
  "URE_rep1_cell507", "URE_rep1_cell520", "URE_rep1_cell522", "URE_rep1_cell527",
  "URE_rep1_cell531", "URE_rep1_cell546", "URE_rep1_cell548", "URE_rep1_cell554",
  "URE_rep1_cell555", "URE_rep1_cell556", "URE_rep1_cell570", "URE_rep1_cell577",
  "URE_rep1_cell587", "URE_rep1_cell588", "URE_rep1_cell591", "URE_rep1_cell608",
  "URE_rep1_cell636", "URE_rep1_cell654", "URE_rep1_cell662", "URE_rep1_cell666",
  "URE_rep1_cell668", "URE_rep1_cell671", "URE_rep1_cell680", "URE_rep1_cell683",
  "URE_rep1_cell687", "URE_rep1_cell694", "URE_rep1_cell695", "URE_rep1_cell700",
  "URE_rep1_cell701", "URE_rep1_cell705", "URE_rep1_cell708", "URE_rep1_cell718",
  "URE_rep1_cell719", "URE_rep1_cell721", "URE_rep1_cell726", "URE_rep1_cell743",
  "URE_rep1_cell745", "URE_rep1_cell747", "URE_rep1_cell754", "URE_rep1_cell762",
  "URE_rep1_cell765", "URE_rep1_cell775", "URE_rep1_cell784", "URE_rep1_cell790",
  "URE_rep1_cell793", "URE_rep1_cell797", "URE_rep1_cell798", "URE_rep1_cell800",
  "URE_rep1_cell802", "URE_rep1_cell806", "URE_rep1_cell810", "URE_rep1_cell812",
  "URE_rep1_cell814", "URE_rep1_cell816", "URE_rep1_cell833", "URE_rep1_cell835",
  "URE_rep1_cell846", "URE_rep1_cell850", "URE_rep1_cell866", "URE_rep1_cell874",
  "URE_rep1_cell897", "URE_rep1_cell904", "URE_rep1_cell908", "URE_rep1_cell924",
  "URE_rep1_cell939", "URE_rep1_cell953", "URE_rep1_cell957", "URE_rep1_cell959",
  "URE_rep1_cell971", "URE_rep1_cell972", "URE_rep1_cell979", "URE_rep1_cell986",
  "URE_rep1_cell988", "URE_rep1_cell991", "URE_rep1_cell992", "URE_rep1_cell1001",
  "URE_rep1_cell1002", "URE_rep1_cell1027", "URE_rep1_cell1043",
  "URE_rep1_cell1044", "URE_rep1_cell1052", "URE_rep1_cell1063",
  "URE_rep1_cell1064", "URE_rep1_cell1065", "URE_rep1_cell1074",
  "URE_rep1_cell1076", "URE_rep1_cell1083", "URE_rep1_cell1118",
  "URE_rep1_cell1133", "URE_rep1_cell1139", "URE_rep1_cell1147",
  "URE_rep1_cell1157", "URE_rep1_cell1162", "URE_rep1_cell1167",
  "URE_rep1_cell1204", "URE_rep1_cell1208", "URE_rep1_cell1215",
  "URE_rep1_cell1232", "URE_rep1_cell1258", "URE_rep1_cell1287",
  "URE_rep1_cell1291", "URE_rep1_cell1296", "URE_rep1_cell1302",
  "URE_rep1_cell1343", "URE_rep1_cell1345", "URE_rep1_cell1349",
  "URE_rep1_cell1351", "URE_rep1_cell1366", "URE_rep1_cell1368",
  "URE_rep1_cell1373", "URE_rep1_cell1394", "URE_rep1_cell1406",
  "URE_rep1_cell1423", "URE_rep1_cell1424", "URE_rep1_cell1432",
  "URE_rep1_cell1445", "URE_rep1_cell1452", "URE_rep1_cell1468",
  "URE_rep1_cell1484", "URE_rep1_cell1517", "URE_rep1_cell1523",
  "URE_rep1_cell1525", "URE_rep1_cell1531", "URE_rep1_cell1532",
  "URE_rep1_cell1544", "URE_rep1_cell1563", "URE_rep1_cell1565",
  "URE_rep1_cell1580", "URE_rep1_cell1589", "URE_rep1_cell1590",
  "URE_rep1_cell1625", "URE_rep1_cell1627", "URE_rep1_cell1690",
  "URE_rep1_cell1708", "URE_rep1_cell1721", "URE_rep1_cell1722",
  "URE_rep1_cell1736", "URE_rep1_cell1739", "URE_rep1_cell1750",
  "URE_rep1_cell1768", "URE_rep1_cell1786", "URE_rep1_cell1791",
  "URE_rep1_cell1794", "URE_rep1_cell1802", "URE_rep1_cell1814",
  "URE_rep1_cell1818", "URE_rep1_cell1825", "URE_rep1_cell1826",
  "URE_rep1_cell1835", "URE_rep1_cell1850", "URE_rep1_cell1854",
  "URE_rep1_cell1855", "URE_rep1_cell1863", "URE_rep1_cell1873",
  "URE_rep1_cell1884", "URE_rep1_cell1892", "URE_rep1_cell1910",
  "URE_rep1_cell1917", "URE_rep1_cell1933", "URE_rep1_cell1934",
  "URE_rep1_cell1935", "URE_rep1_cell1945", "URE_rep1_cell1958",
  "URE_rep1_cell1960", "URE_rep1_cell1982", "URE_rep1_cell1987",
  "URE_rep1_cell2025", "URE_rep1_cell2036", "URE_rep1_cell2048",
  "URE_rep1_cell2059", "URE_rep1_cell2064", "URE_rep1_cell2080",
  "URE_rep1_cell2086", "URE_rep1_cell2093", "URE_rep1_cell2100",
  "URE_rep1_cell2116", "URE_rep1_cell2120", "URE_rep1_cell2123",
  "URE_rep1_cell2134", "URE_rep1_cell2142", "URE_rep1_cell2211",
  "URE_rep1_cell2253", "URE_rep1_cell2265", "URE_rep1_cell2284",
  "URE_rep1_cell2301", "URE_rep1_cell2312", "URE_rep1_cell2342",
  "URE_rep1_cell2366", "URE_rep1_cell2394", "URE_rep1_cell2398",
  "URE_rep1_cell2403", "URE_rep1_cell2411", "URE_rep1_cell2455",
  "URE_rep1_cell2472", "URE_rep1_cell2475", "URE_rep1_cell2487",
  "URE_rep1_cell2504", "URE_rep1_cell2516", "URE_rep1_cell2522",
  "URE_rep1_cell2553", "URE_rep1_cell2558", "URE_rep1_cell2559",
  "URE_rep1_cell2567", "URE_rep1_cell2568", "URE_rep1_cell2586",
  "URE_rep1_cell2606", "URE_rep1_cell2626", "URE_rep1_cell2630",
  "URE_rep1_cell2665", "URE_rep1_cell2673", "URE_rep1_cell2675",
  "URE_rep1_cell2722", "URE_rep1_cell2730", "URE_rep1_cell2740",
  "URE_rep1_cell2784", "URE_rep1_cell2802", "URE_rep1_cell2804",
  "URE_rep1_cell2810", "URE_rep1_cell2818", "URE_rep1_cell2856",
  "URE_rep1_cell2867", "URE_rep1_cell2888", "URE_rep1_cell2917",
  "URE_rep1_cell2930", "URE_rep1_cell2940", "URE_rep1_cell2980",
  "URE_rep1_cell3030", "URE_rep1_cell3033", "URE_rep1_cell3082",
  "URE_rep1_cell3128", "URE_rep1_cell3146", "URE_rep1_cell3158",
  "URE_rep1_cell3222", "URE_rep1_cell3383", "URE_rep1_cell3426",
  "URE_rep1_cell3494", "URE_rep1_cell3612", "URE_rep1_cell3631",
  "URE_rep1_cell3737", "URE_rep1_cell3826", "URE_rep1_cell3884",
  "URE_rep1_cell4042", "URE_rep1_cell4140", "URE_rep1_cell4185",
  "URE_rep1_cell4329", "URE_rep1_cell4563", "URE_rep1_cell5104",
  "URE_rep1_cell5703", "URE_rep2_cell362", "URE_rep2_cell365",
  "URE_rep2_cell374", "URE_rep2_cell389", "URE_rep2_cell414", "URE_rep2_cell418",
  "URE_rep2_cell439", "URE_rep2_cell440", "URE_rep2_cell444", "URE_rep2_cell466",
  "URE_rep2_cell470", "URE_rep2_cell474", "URE_rep2_cell476", "URE_rep2_cell479",
  "URE_rep2_cell497", "URE_rep2_cell501", "URE_rep2_cell510", "URE_rep2_cell516",
  "URE_rep2_cell519", "URE_rep2_cell521", "URE_rep2_cell524", "URE_rep2_cell547",
  "URE_rep2_cell559", "URE_rep2_cell563", "URE_rep2_cell576", "URE_rep2_cell583",
  "URE_rep2_cell587", "URE_rep2_cell588", "URE_rep2_cell589", "URE_rep2_cell591",
  "URE_rep2_cell597", "URE_rep2_cell598", "URE_rep2_cell603", "URE_rep2_cell607",
  "URE_rep2_cell611", "URE_rep2_cell612", "URE_rep2_cell613", "URE_rep2_cell619",
  "URE_rep2_cell628", "URE_rep2_cell641", "URE_rep2_cell654", "URE_rep2_cell658",
  "URE_rep2_cell661", "URE_rep2_cell662", "URE_rep2_cell664", "URE_rep2_cell670",
  "URE_rep2_cell674", "URE_rep2_cell679", "URE_rep2_cell694", "URE_rep2_cell697",
  "URE_rep2_cell698", "URE_rep2_cell700", "URE_rep2_cell702", "URE_rep2_cell708",
  "URE_rep2_cell713", "URE_rep2_cell730", "URE_rep2_cell731", "URE_rep2_cell732",
  "URE_rep2_cell736", "URE_rep2_cell739", "URE_rep2_cell748", "URE_rep2_cell751",
  "URE_rep2_cell760", "URE_rep2_cell763", "URE_rep2_cell778", "URE_rep2_cell784",
  "URE_rep2_cell787", "URE_rep2_cell790", "URE_rep2_cell796", "URE_rep2_cell806",
  "URE_rep2_cell808", "URE_rep2_cell809", "URE_rep2_cell811", "URE_rep2_cell818",
  "URE_rep2_cell822", "URE_rep2_cell824", "URE_rep2_cell836", "URE_rep2_cell837",
  "URE_rep2_cell844", "URE_rep2_cell851", "URE_rep2_cell852", "URE_rep2_cell855",
  "URE_rep2_cell865", "URE_rep2_cell867", "URE_rep2_cell868", "URE_rep2_cell871",
  "URE_rep2_cell872", "URE_rep2_cell873", "URE_rep2_cell875", "URE_rep2_cell876",
  "URE_rep2_cell894", "URE_rep2_cell897", "URE_rep2_cell910", "URE_rep2_cell913",
  "URE_rep2_cell915", "URE_rep2_cell918", "URE_rep2_cell927", "URE_rep2_cell936",
  "URE_rep2_cell937", "URE_rep2_cell939", "URE_rep2_cell946", "URE_rep2_cell953",
  "URE_rep2_cell955", "URE_rep2_cell957", "URE_rep2_cell958", "URE_rep2_cell965",
  "URE_rep2_cell978", "URE_rep2_cell986", "URE_rep2_cell987", "URE_rep2_cell989",
  "URE_rep2_cell990", "URE_rep2_cell1006", "URE_rep2_cell1031",
  "URE_rep2_cell1042", "URE_rep2_cell1060", "URE_rep2_cell1063",
  "URE_rep2_cell1076", "URE_rep2_cell1078", "URE_rep2_cell1080",
  "URE_rep2_cell1081", "URE_rep2_cell1084", "URE_rep2_cell1089",
  "URE_rep2_cell1110", "URE_rep2_cell1117", "URE_rep2_cell1131",
  "URE_rep2_cell1147", "URE_rep2_cell1156", "URE_rep2_cell1166",
  "URE_rep2_cell1172", "URE_rep2_cell1177", "URE_rep2_cell1178",
  "URE_rep2_cell1181", "URE_rep2_cell1182", "URE_rep2_cell1192",
  "URE_rep2_cell1195", "URE_rep2_cell1205", "URE_rep2_cell1208",
  "URE_rep2_cell1218", "URE_rep2_cell1220", "URE_rep2_cell1234",
  "URE_rep2_cell1280", "URE_rep2_cell1281", "URE_rep2_cell1291",
  "URE_rep2_cell1303", "URE_rep2_cell1317", "URE_rep2_cell1321",
  "URE_rep2_cell1322", "URE_rep2_cell1324", "URE_rep2_cell1330",
  "URE_rep2_cell1342", "URE_rep2_cell1343", "URE_rep2_cell1376",
  "URE_rep2_cell1389", "URE_rep2_cell1395", "URE_rep2_cell1434",
  "URE_rep2_cell1440", "URE_rep2_cell1441", "URE_rep2_cell1444",
  "URE_rep2_cell1453", "URE_rep2_cell1454", "URE_rep2_cell1455",
  "URE_rep2_cell1459", "URE_rep2_cell1472", "URE_rep2_cell1474",
  "URE_rep2_cell1477", "URE_rep2_cell1482", "URE_rep2_cell1522",
  "URE_rep2_cell1547", "URE_rep2_cell1552", "URE_rep2_cell1555",
  "URE_rep2_cell1563", "URE_rep2_cell1567", "URE_rep2_cell1568",
  "URE_rep2_cell1582", "URE_rep2_cell1587", "URE_rep2_cell1595",
  "URE_rep2_cell1598", "URE_rep2_cell1609", "URE_rep2_cell1618",
  "URE_rep2_cell1631", "URE_rep2_cell1639", "URE_rep2_cell1648",
  "URE_rep2_cell1652", "URE_rep2_cell1653", "URE_rep2_cell1654",
  "URE_rep2_cell1665", "URE_rep2_cell1681", "URE_rep2_cell1693",
  "URE_rep2_cell1697", "URE_rep2_cell1705", "URE_rep2_cell1709",
  "URE_rep2_cell1723", "URE_rep2_cell1725", "URE_rep2_cell1743",
  "URE_rep2_cell1751", "URE_rep2_cell1757", "URE_rep2_cell1773",
  "URE_rep2_cell1774", "URE_rep2_cell1778", "URE_rep2_cell1779",
  "URE_rep2_cell1785", "URE_rep2_cell1791", "URE_rep2_cell1801",
  "URE_rep2_cell1803", "URE_rep2_cell1823", "URE_rep2_cell1836",
  "URE_rep2_cell1837", "URE_rep2_cell1847", "URE_rep2_cell1869",
  "URE_rep2_cell1873", "URE_rep2_cell1877", "URE_rep2_cell1888",
  "URE_rep2_cell1890", "URE_rep2_cell1911", "URE_rep2_cell1928",
  "URE_rep2_cell1985", "URE_rep2_cell1998", "URE_rep2_cell2001",
  "URE_rep2_cell2041", "URE_rep2_cell2053", "URE_rep2_cell2059",
  "URE_rep2_cell2062", "URE_rep2_cell2071", "URE_rep2_cell2082",
  "URE_rep2_cell2090", "URE_rep2_cell2095", "URE_rep2_cell2096",
  "URE_rep2_cell2109", "URE_rep2_cell2136", "URE_rep2_cell2193",
  "URE_rep2_cell2203", "URE_rep2_cell2223", "URE_rep2_cell2275",
  "URE_rep2_cell2302", "URE_rep2_cell2310", "URE_rep2_cell2340",
  "URE_rep2_cell2345", "URE_rep2_cell2378", "URE_rep2_cell2402",
  "URE_rep2_cell2426", "URE_rep2_cell2451", "URE_rep2_cell2461",
  "URE_rep2_cell2472", "URE_rep2_cell2473", "URE_rep2_cell2481",
  "URE_rep2_cell2498", "URE_rep2_cell2517", "URE_rep2_cell2526",
  "URE_rep2_cell2535", "URE_rep2_cell2586", "URE_rep2_cell2619",
  "URE_rep2_cell2629", "URE_rep2_cell2632", "URE_rep2_cell2666",
  "URE_rep2_cell2717", "URE_rep2_cell2759", "URE_rep2_cell2767",
  "URE_rep2_cell2772", "URE_rep2_cell2783", "URE_rep2_cell2802",
  "URE_rep2_cell2804", "URE_rep2_cell2805", "URE_rep2_cell2876",
  "URE_rep2_cell3038", "URE_rep2_cell3062", "URE_rep2_cell3071",
  "URE_rep2_cell3075", "URE_rep2_cell3079", "URE_rep2_cell3101",
  "URE_rep2_cell3109", "URE_rep2_cell3113", "URE_rep2_cell3197",
  "URE_rep2_cell3227", "URE_rep2_cell3233", "URE_rep2_cell3242",
  "URE_rep2_cell3246", "URE_rep2_cell3316", "URE_rep2_cell3334",
  "URE_rep2_cell3399", "URE_rep2_cell3441", "URE_rep2_cell3443",
  "URE_rep2_cell3478", "URE_rep2_cell3483", "URE_rep2_cell3485",
  "URE_rep2_cell3486", "URE_rep2_cell3506", "URE_rep2_cell3541",
  "URE_rep2_cell3615", "URE_rep2_cell3661", "URE_rep2_cell3698",
  "URE_rep2_cell3703", "URE_rep2_cell3705", "URE_rep2_cell3747",
  "URE_rep2_cell3756", "URE_rep2_cell3881", "URE_rep2_cell3909",
  "URE_rep2_cell3933", "URE_rep2_cell3944", "URE_rep2_cell3952",
  "URE_rep2_cell3970", "URE_rep2_cell4016", "URE_rep2_cell4062",
  "URE_rep2_cell4159", "URE_rep2_cell4253", "URE_rep2_cell4393",
  "URE_rep2_cell4413", "URE_rep2_cell4417", "URE_rep2_cell4449",
  "URE_rep2_cell4524", "URE_rep2_cell4648", "URE_rep2_cell4719",
  "URE_rep2_cell4805", "URE_rep2_cell4816", "URE_rep2_cell4927",
  "URE_rep2_cell5364", "URE_rep2_cell5436", "URE_rep2_cell5508",
  "URE_rep2_cell5762", "URE_rep2_cell5854", "URE_rep2_cell5990",
  "URE_rep2_cell7381", "URE_rep2_cell7539", "URE_rep2_cell7607",
  "URE_rep2_cell7811", "URE_rep2_cell7873", "WT_rep1_cell144",
  "WT_rep1_cell229", "WT_rep1_cell243", "WT_rep1_cell263", "WT_rep1_cell275",
  "WT_rep1_cell281", "WT_rep1_cell292", "WT_rep1_cell295", "WT_rep1_cell296",
  "WT_rep1_cell301", "WT_rep1_cell308", "WT_rep1_cell312", "WT_rep1_cell320",
  "WT_rep1_cell332", "WT_rep1_cell334", "WT_rep1_cell339", "WT_rep1_cell346",
  "WT_rep1_cell347", "WT_rep1_cell350", "WT_rep1_cell355", "WT_rep1_cell364",
  "WT_rep1_cell367", "WT_rep1_cell374", "WT_rep1_cell383", "WT_rep1_cell389",
  "WT_rep1_cell398", "WT_rep1_cell404", "WT_rep1_cell429", "WT_rep1_cell438",
  "WT_rep1_cell441", "WT_rep1_cell452", "WT_rep1_cell461", "WT_rep1_cell462",
  "WT_rep1_cell469", "WT_rep1_cell470", "WT_rep1_cell476", "WT_rep1_cell494",
  "WT_rep1_cell513", "WT_rep1_cell516", "WT_rep1_cell532", "WT_rep1_cell535",
  "WT_rep1_cell549", "WT_rep1_cell550", "WT_rep1_cell566", "WT_rep1_cell567",
  "WT_rep1_cell569", "WT_rep1_cell570", "WT_rep1_cell574", "WT_rep1_cell587",
  "WT_rep1_cell594", "WT_rep1_cell597", "WT_rep1_cell598", "WT_rep1_cell600",
  "WT_rep1_cell603", "WT_rep1_cell604", "WT_rep1_cell609", "WT_rep1_cell616",
  "WT_rep1_cell631", "WT_rep1_cell672", "WT_rep1_cell689", "WT_rep1_cell693",
  "WT_rep1_cell721", "WT_rep1_cell743", "WT_rep1_cell760", "WT_rep1_cell782",
  "WT_rep1_cell796", "WT_rep1_cell801", "WT_rep1_cell822", "WT_rep1_cell826",
  "WT_rep1_cell831", "WT_rep1_cell841", "WT_rep1_cell886", "WT_rep1_cell894",
  "WT_rep1_cell901", "WT_rep1_cell902", "WT_rep1_cell903", "WT_rep1_cell908",
  "WT_rep1_cell937", "WT_rep1_cell938", "WT_rep1_cell939", "WT_rep1_cell943",
  "WT_rep1_cell945", "WT_rep1_cell949", "WT_rep1_cell950", "WT_rep1_cell956",
  "WT_rep1_cell962", "WT_rep1_cell976", "WT_rep1_cell984", "WT_rep1_cell994",
  "WT_rep1_cell1015", "WT_rep1_cell1031", "WT_rep1_cell1074", "WT_rep1_cell1094",
  "WT_rep1_cell1095", "WT_rep1_cell1116", "WT_rep1_cell1124", "WT_rep1_cell1148",
  "WT_rep1_cell1160", "WT_rep1_cell1179", "WT_rep1_cell1180", "WT_rep1_cell1199",
  "WT_rep1_cell1209", "WT_rep1_cell1212", "WT_rep1_cell1264", "WT_rep1_cell1281",
  "WT_rep1_cell1283", "WT_rep1_cell1289", "WT_rep1_cell1293", "WT_rep1_cell1301",
  "WT_rep1_cell1332", "WT_rep1_cell1356", "WT_rep1_cell1366", "WT_rep1_cell1375",
  "WT_rep1_cell1409", "WT_rep1_cell1417", "WT_rep1_cell1423", "WT_rep1_cell1437",
  "WT_rep1_cell1450", "WT_rep1_cell1455", "WT_rep1_cell1457", "WT_rep1_cell1491",
  "WT_rep1_cell1492", "WT_rep1_cell1500", "WT_rep1_cell1561", "WT_rep1_cell1588",
  "WT_rep1_cell1602", "WT_rep1_cell1609", "WT_rep1_cell1622", "WT_rep1_cell1627",
  "WT_rep1_cell1645", "WT_rep1_cell1731", "WT_rep1_cell1747", "WT_rep1_cell1759",
  "WT_rep1_cell1803", "WT_rep1_cell1827", "WT_rep1_cell1837", "WT_rep1_cell1847",
  "WT_rep1_cell1883", "WT_rep1_cell1893", "WT_rep1_cell1926", "WT_rep1_cell1927",
  "WT_rep1_cell1970", "WT_rep1_cell1989", "WT_rep1_cell2072", "WT_rep1_cell2085",
  "WT_rep1_cell2107", "WT_rep1_cell2121", "WT_rep1_cell2137", "WT_rep1_cell2141",
  "WT_rep1_cell2174", "WT_rep1_cell2201", "WT_rep1_cell2239", "WT_rep1_cell2263",
  "WT_rep1_cell2267", "WT_rep1_cell2332", "WT_rep1_cell2406", "WT_rep1_cell2470",
  "WT_rep1_cell2478", "WT_rep1_cell2484", "WT_rep1_cell2533", "WT_rep1_cell2552",
  "WT_rep1_cell2565", "WT_rep1_cell2567", "WT_rep1_cell2625", "WT_rep1_cell2636",
  "WT_rep1_cell2647", "WT_rep1_cell2663", "WT_rep1_cell2666", "WT_rep1_cell2697",
  "WT_rep1_cell2700", "WT_rep1_cell2723", "WT_rep1_cell2733", "WT_rep1_cell2745",
  "WT_rep1_cell2790", "WT_rep1_cell2820", "WT_rep1_cell2844", "WT_rep1_cell2876",
  "WT_rep1_cell2898", "WT_rep1_cell2902", "WT_rep1_cell2966", "WT_rep1_cell2970",
  "WT_rep1_cell3000", "WT_rep1_cell3002", "WT_rep1_cell3043", "WT_rep1_cell3061",
  "WT_rep1_cell3063", "WT_rep1_cell3182", "WT_rep1_cell3191", "WT_rep1_cell3235",
  "WT_rep1_cell3262", "WT_rep1_cell3296", "WT_rep1_cell3319", "WT_rep1_cell3337",
  "WT_rep1_cell3477", "WT_rep1_cell3512", "WT_rep1_cell3615", "WT_rep1_cell3666",
  "WT_rep1_cell3688", "WT_rep1_cell3767", "WT_rep1_cell3872", "WT_rep1_cell3971",
  "WT_rep1_cell3977", "WT_rep1_cell3985", "WT_rep1_cell4053", "WT_rep1_cell4153",
  "WT_rep1_cell4210", "WT_rep1_cell4265", "WT_rep1_cell4301", "WT_rep1_cell4930",
  "WT_rep1_cell5123", "WT_rep1_cell5556", "WT_rep2_cell77", "WT_rep2_cell174",
  "WT_rep2_cell175", "WT_rep2_cell196", "WT_rep2_cell199", "WT_rep2_cell221",
  "WT_rep2_cell233", "WT_rep2_cell236", "WT_rep2_cell270", "WT_rep2_cell286",
  "WT_rep2_cell288", "WT_rep2_cell308", "WT_rep2_cell314", "WT_rep2_cell319",
  "WT_rep2_cell323", "WT_rep2_cell329", "WT_rep2_cell333", "WT_rep2_cell334",
  "WT_rep2_cell335", "WT_rep2_cell359", "WT_rep2_cell379", "WT_rep2_cell382",
  "WT_rep2_cell385", "WT_rep2_cell389", "WT_rep2_cell400", "WT_rep2_cell404",
  "WT_rep2_cell409", "WT_rep2_cell410", "WT_rep2_cell411", "WT_rep2_cell423",
  "WT_rep2_cell425", "WT_rep2_cell432", "WT_rep2_cell433", "WT_rep2_cell451",
  "WT_rep2_cell457", "WT_rep2_cell460", "WT_rep2_cell463", "WT_rep2_cell474",
  "WT_rep2_cell476", "WT_rep2_cell494", "WT_rep2_cell511", "WT_rep2_cell523",
  "WT_rep2_cell525", "WT_rep2_cell568", "WT_rep2_cell593", "WT_rep2_cell599",
  "WT_rep2_cell634", "WT_rep2_cell645", "WT_rep2_cell647", "WT_rep2_cell660",
  "WT_rep2_cell666", "WT_rep2_cell677", "WT_rep2_cell681", "WT_rep2_cell686",
  "WT_rep2_cell694", "WT_rep2_cell698", "WT_rep2_cell699", "WT_rep2_cell707",
  "WT_rep2_cell711", "WT_rep2_cell722", "WT_rep2_cell729", "WT_rep2_cell735",
  "WT_rep2_cell743", "WT_rep2_cell763", "WT_rep2_cell765", "WT_rep2_cell769",
  "WT_rep2_cell782", "WT_rep2_cell792", "WT_rep2_cell828", "WT_rep2_cell833",
  "WT_rep2_cell849", "WT_rep2_cell866", "WT_rep2_cell874", "WT_rep2_cell883",
  "WT_rep2_cell894", "WT_rep2_cell909", "WT_rep2_cell916", "WT_rep2_cell923",
  "WT_rep2_cell929", "WT_rep2_cell951", "WT_rep2_cell968", "WT_rep2_cell1005",
  "WT_rep2_cell1062", "WT_rep2_cell1073", "WT_rep2_cell1083", "WT_rep2_cell1091",
  "WT_rep2_cell1117", "WT_rep2_cell1137", "WT_rep2_cell1140", "WT_rep2_cell1184",
  "WT_rep2_cell1186", "WT_rep2_cell1187", "WT_rep2_cell1191", "WT_rep2_cell1199",
  "WT_rep2_cell1200", "WT_rep2_cell1218", "WT_rep2_cell1222", "WT_rep2_cell1236",
  "WT_rep2_cell1245", "WT_rep2_cell1291", "WT_rep2_cell1310", "WT_rep2_cell1321",
  "WT_rep2_cell1329", "WT_rep2_cell1348", "WT_rep2_cell1408", "WT_rep2_cell1420",
  "WT_rep2_cell1486", "WT_rep2_cell1513", "WT_rep2_cell1566", "WT_rep2_cell1584",
  "WT_rep2_cell1585", "WT_rep2_cell1599", "WT_rep2_cell1624", "WT_rep2_cell1634",
  "WT_rep2_cell1650", "WT_rep2_cell1668", "WT_rep2_cell1702", "WT_rep2_cell1713",
  "WT_rep2_cell1730", "WT_rep2_cell1743", "WT_rep2_cell1746", "WT_rep2_cell1780",
  "WT_rep2_cell1781", "WT_rep2_cell1801", "WT_rep2_cell1821", "WT_rep2_cell1832",
  "WT_rep2_cell1844", "WT_rep2_cell1892", "WT_rep2_cell1899", "WT_rep2_cell1971",
  "WT_rep2_cell1972", "WT_rep2_cell1975", "WT_rep2_cell1994", "WT_rep2_cell2048",
  "WT_rep2_cell2069", "WT_rep2_cell2075", "WT_rep2_cell2094", "WT_rep2_cell2121",
  "WT_rep2_cell2134", "WT_rep2_cell2163", "WT_rep2_cell2200", "WT_rep2_cell2204",
  "WT_rep2_cell2284", "WT_rep2_cell2324", "WT_rep2_cell2350", "WT_rep2_cell2351",
  "WT_rep2_cell2379", "WT_rep2_cell2432", "WT_rep2_cell2450", "WT_rep2_cell2524",
  "WT_rep2_cell2608", "WT_rep2_cell2630", "WT_rep2_cell2652", "WT_rep2_cell2709",
  "WT_rep2_cell2714", "WT_rep2_cell2833", "WT_rep2_cell2834", "WT_rep2_cell2893",
  "WT_rep2_cell3065", "WT_rep2_cell3236", "WT_rep2_cell3249", "WT_rep2_cell3359",
  "WT_rep2_cell3494", "WT_rep2_cell3553", "WT_rep2_cell3635", "WT_rep2_cell3695",
  "WT_rep2_cell4114"
)

# One tiny cluster was identified as dendritic cells, remove this as not focus of out study and few cells so low power
plot_umap(sce, gene = "Cd74") | (plot_umap(sce, by = "cluster", text_add = FALSE) + theme(legend.position = "none"))

cd74_all <-
  data.frame(
    value = as.numeric(assay(sce["ENSMUSG00000024610_Cd74", ], "logcounts")[1, ]),
    cluster = sce$cluster
  ) %>%
  dplyr::mutate(isexpr = if_else(value > 0, TRUE, FALSE)) %>%
  dplyr::group_by(cluster) %>%
  dplyr::summarize(percent = 100 * sum(isexpr) / length(isexpr)) %>%
  data.frame()

is_dc_cells <-
  cd74_all %>%
  dplyr::filter(percent > 99) %>%
  dplyr::select(cluster) %>%
  dplyr::left_join(x = ., y = data.frame(colData(sce), cell_name = colnames(sce)), by = "cluster") %>%
  pull(cell_name)

# Combine DCs and doublets into a vector of cells to be removed
rm.cells <- c(is_doublet_cells, is_dc_cells)

sce <- sce[, !colnames(sce) %in% rm.cells]

# Now with all unwanted cells remove produce the final clustering and UMAP for downstream analysis
set.seed(2020)
snngraph <- scran::buildSNNGraph(sce, use.dimred = "corrected", type = "jaccard", k = 20)

sce$cluster <- factor(igraph::cluster_louvain(snngraph)$membership)
rm(snngraph)

set.seed(2020)
sce <- scater::runUMAP(sce, dimred = "corrected", min_dist = .5, spread = .5)

# We then renamed the clusters post-hoc so they're developmentally chronological
df.rename <- data.frame(Old = levels(sce$cluster), New = c("C2", "C7", "C4", "C5", "C6", "C1", "C3", "C8"))
sce$cluster <- factor(df.rename$New[match(as.character(sce$cluster), df.rename$Old)],
  levels = paste0("C", seq(1, max(as.numeric(gsub("C", "", as.character(df.rename$New))))))
)

sce$cg <- factor(paste(sce$genotype, sce$cluster, sep = "_"))

# UMAP by cluster:
Figure_1A <- plot_umap(sce, by = "cluster", text_add = TRUE, text_use_label = TRUE, label_size = 5) +
  theme(legend.position = "none") +
  coord_fixed()

Figure_1A

Differential expression analysis

All differential analysis of the single-cell data is done via DESeq2 on pseudobulk level. For this we first aggregate cells per replicate-cluster-genotype into pseudobulks, then normalize this by DESeq2, then test:

  • within WT and URE all vs all clusters to get markers
  • URE vs WT between comparing the same cluster to get genotype-induced DEGs
  • for WT clusters C3 vs C5 to get a

Pseudobulk aggregation and normalization

# Pseudobulk aggregation by cluster, replicate and genotype
aggregate_by <- c("cluster", "genotype", "replicate")
summed <- scuttle::aggregateAcrossCells(sce, id = colData(sce)[, aggregate_by])
colnames(summed) <- paste(summed$genotype, summed$cluster, summed$replicate, sep = "_")

gf <- factor(paste(sce$genotype, sce$cluster, sep = "_"))
pseudobulk_percent_expressed <- get_pexpr(data = assay(sce, "counts"), group = as.character(gf))

# Build a DESeqDataSet with a full-factorial design based on the pseudobulks
use_counts <- round(assay(summed, "counts"))
mode(use_counts) <- "integer"
group_factor <- factor(gsub("_rep.*", "", colnames(summed)))

dds_pseudobulk <- DESeq2::DESeqDataSetFromMatrix(
  countData = use_counts,
  colData = DataFrame(
    group = group_factor,
    genotype = factor(summed$genotype),
    cluster = factor(summed$cluster)
  ),
  design = ~group
)

rm(use_counts, summed, group_factor)

# Exclude the clusters with notably different cell numbers between genotypes
dds_pseudobulk <- dds_pseudobulk[, !dds_pseudobulk$group %in% c("WT_C4", "URE_C5")]
colData(dds_pseudobulk) <- droplevels.data.frame(colData(dds_pseudobulk))

# As a first prefilter, only keep genes that are expressed by at least 10% of cells per factorial group
dds_pseudobulk <- dds_pseudobulk[rowSums(pseudobulk_percent_expressed[, levels(dds_pseudobulk$group)] >= 10) > 0, ]

# Normalize pseudobulks with DESeq2
dds_pseudobulk <- DESeq2::estimateSizeFactors(dds_pseudobulk)

# Use rlog downstream for clustering of the URE vs WT DEGs
rld <- assay(DESeq2::rlog(dds_pseudobulk, blind = FALSE))

DEGs to derive per-cluster markers

Per genotype we test all vs all clusters and then as marker we keep genes overexpressed in a given cluster versus all but one other cluster. This allows a limited overlap of markers while being reasonably strict. Overexpression criteria is FDR < 0.05 and FC > 1.5. We only consider genes expressed in at least 25% of cells of the tested cluster.

lvl_genotype <- levels(dds_pseudobulk$genotype)

pseudobulk_markers <- lapply(lvl_genotype, function(g) {

  # Test clusters all vs all
  dds <- dds_pseudobulk[, dds_pseudobulk$genotype %in% g]
  colData(dds) <- droplevels.data.frame(colData(dds))
  design(dds) <- ~cluster
  dds <- DESeq2::DESeq(dds, quiet = TRUE)

  lvl_cluster <- levels(droplevels(dds$cluster))

  all_contrasts <- combn(lvl_cluster, 2, function(x) c("cluster", x), simplify = FALSE)
  all_contrasts <- c(all_contrasts, lapply(all_contrasts, function(x) c(x[1], x[3], x[2])))

  markers <- mclapply(all_contrasts, mc.cores = mc_workers, function(co) {
    res <- DESeq2::results(object = dds, contrast = co, alpha = 0.05)
    lfc <- as.data.frame(DESeq2::lfcShrink(dds = dds, contrast = co, type = "ashr", quiet = TRUE))
    rn <- rownames(lfc[lfc$log2FoldChange > log2(1.5) & lfc$padj < 0.05, ])
    cl <- co[2]
    keep25 <- pseudobulk_percent_expressed[, paste0(g, "_", cl), drop = TRUE] >= 25
    final <- intersect(names(keep25[keep25]), rn)

    return(final)
  })

  # We have 7 clusters, so testing 1 versus 6 others and require that a gene must be overexpressed
  # in the given versus all other clusters
  min_n <- length(lvl_cluster) - 1
  markers <- sapply(lvl_cluster, function(cl) {
    u <- which(unlist(lapply(all_contrasts, function(x) x[2] == cl)))
    tbl <- table(unlist(markers[u]))
    m <- tbl == min_n
    names(m[m])
  }, simplify = FALSE)

  names(markers) <- paste0(g, "_", names(markers))

  return(markers)
})

pseudobulk_markers <- c(pseudobulk_markers[[1]], pseudobulk_markers[[2]])

lengths(pseudobulk_markers)
#>  WT_C1  WT_C2  WT_C3  WT_C5  WT_C6  WT_C7  WT_C8 URE_C1 URE_C2 URE_C3 URE_C4 
#>     45      0    152     43    105     42    204     73      4     28    839 
#> URE_C6 URE_C7 URE_C8 
#>     97     42    108
Dataset_EV1 <- sapply(names(pseudobulk_markers), function(x) {
  genotype <- gsub("_.*", "", x)
  current <- pseudobulk_markers[[x]]

  first <- as.numeric(pseudobulk_percent_expressed[current, x, drop = TRUE])

  names_second <- setdiff(grep(genotype, names(pseudobulk_markers), value = TRUE), x)
  second <- as.numeric(apply(pseudobulk_percent_expressed[current, names_second], 1, mean))

  d <- data.frame(gene = current, pct.1 = first, pct.average.rest = second)

  return(d)
}, simplify = FALSE)

openxlsx::write.xlsx(x = Dataset_EV1, file = paste0(outdir, "/Dataset_EV1_raw.xlsx"), overwrite = TRUE)

DEGs for URE vs WT

This chunk runs DESeq2 to test for differential expression between URE and WT of the same cluster. DEGs of C1, C2 and C3 are extracted (FC > 1.5, FDR < 0.005), combined and subjected to hclust.

dds_pseudobulk <- DESeq2::DESeq(dds_pseudobulk, quiet = TRUE)
dds_pseudobulk$group <- droplevels(dds_pseudobulk$group)
rownames_dds_ure_wt <- rownames(dds_pseudobulk)

ure_wt_contrasts <- lapply(c("C1", "C2", "C3"), function(x) c("group", paste0("URE_", x), paste0("WT_", x)))

ure_wt_degs <- lapply(ure_wt_contrasts, function(x) {
  tt <- DESeq2::results(
    object = dds_pseudobulk, contrast = x,
    independentFiltering = FALSE, cooksCutoff = FALSE,
    lfcThreshold = 0, altHypothesis = "greaterAbs"
  ) %>%
    data.frame(Gene = rownames(.), .) %>%
    dplyr::mutate(baseMean = log2(baseMean + 1)) %>%
    dplyr::rename(logFC = log2FoldChange)

  tt <- tt[, c(1, 3, 2, 4:ncol(tt))]

  # Catch corner case when all counts are zero for all columns but the gene is
  # in the dds object because the other genotype groups expressed it,
  # therefore set all columns to zero and pvalues to 1
  tt[rowSums(is.na(tt)) > 0, 2:ncol(tt)] <-
    tt[rowSums(is.na(tt)) > 0, 2:ncol(tt)] %>%
    dplyr::mutate(logFC = 0, baseMean = 0, lfcSE = 0, stat = 0, pvalue = 1, padj = 1)

  # Postfilter, requiring that at least one group of the contrast expressed the gene
  # with at least 100*<num> percent of its cells, if not then set stats to 1
  num <- 10
  fr <- pseudobulk_percent_expressed[tt$Gene, x[2:3]]
  tt <- cbind(tt, fr)
  tt[rowSums(fr > num) == 0, ]$pvalue <- 1
  tt[rowSums(fr > num) == 0, ]$padj <- 1

  return(tt)
})

Heatmap markers

Lots of custom code for Figure 1B. C4 in WT is excluded because it had only 10 cells.

list.wt <- list()

# List of canonical markers for expected celltypes
list.wt$markers_wt_selected <-
  data.frame(
    gene_name =
      c(
        "Flt3", "Hlf", "Ly6a", "Hoxa9",
        "Elane", "S100a8", "S100a9",
        "Ly86", "Csf1r", "Irf8",
        "Lmo4", "Prss34", "Pbx1",
        "Fli1", "Pf4", "Gata2", "Gfi1b",
        "Car1", "Cpox"
      )
  ) %>%
  dplyr::left_join(x = ., y = data.frame(rowData(sce)), by = "gene_name") %>%
  dplyr::mutate(gene = paste(gene_id, gene_name, sep = "_")) %>%
  dplyr::pull(gene)

markers_wt <- pseudobulk_markers[grep("WT_", names(pseudobulk_markers))]

list.wt$markers_wt_unique <-
  rld[unique(unlist(markers_wt)), grep("WT", colnames(rld))] %>%
  reshape2::melt() %>%
  mutate(Var2 = gsub("_rep.*", "", Var2)) %>%
  group_by(Var1, Var2) %>%
  summarize(mean = mean(value)) %>%
  tidyr::pivot_wider(names_from = "Var2", values_from = "mean") %>%
  rename(Gene = Var1)
#> `summarise()` has grouped output by 'Var1'. You can override using the `.groups` argument.
# this summarizes in which cluster the given gene is most highly-expressed (average of the pseudobulks)
list.wt$markers_wt_unique_max <-
  list.wt$markers_wt_unique %>%
  reshape2::melt() %>%
  magrittr::set_colnames(c("gene", "cluster", "expression")) %>%
  dplyr::group_by(gene) %>%
  dplyr::filter(expression == max(expression))
#> Using Gene as id variables
hm_text <- 5

# Z score for each marker based on the rlog counts, trimmed to 1/99th percentile
# to avoid large color scale:
list.wt$markers_wt_z <- scale_by_quantile(
  rowScale(rld[as.character(list.wt$markers_wt_unique_max$gene), grep("WT", colnames(rld))]),
  .01, .99
)

# All this codes decides whether the genes are displayed on the left or right
# of the heatmap
list.wt$on_left_wt <-
  list.wt$markers_wt_selected[list.wt$markers_wt_selected %in%
    list.wt$markers_wt_unique_max[grep("C1|C5|C7", list.wt$markers_wt_unique_max$cluster), ]$gene]

list.wt$on_left_wt <-
  rowAnnotation(foo = anno_mark(
    at = match(list.wt$on_left_wt, rownames(list.wt$markers_wt_z)),
    labels = gsub(".*_", "", list.wt$on_left_wt), side = "left",
    labels_gp = gpar(fontsize = gg2gp(hm_text * .75))
  ))

list.wt$on_right_wt <-
  list.wt$markers_wt_selected[list.wt$markers_wt_selected %in%
    list.wt$markers_wt_unique_max[grep(
      "C3|C6|C8",
      list.wt$markers_wt_unique_max$cluster
    ), ]$gene]
list.wt$on_right_wt <-
  rowAnnotation(foo = anno_mark(
    at = match(list.wt$on_right_wt, rownames(list.wt$markers_wt_z)),
    labels = gsub(".*_", "", list.wt$on_right_wt), side = "right",
    labels_gp = gpar(fontsize = gg2gp(hm_text * .75))
  ))

list.wt$wt.lab <- sapply(strsplit(colnames(list.wt$markers_wt_z), split = "_"), function(x) x[2])

list.wt$wt.colorramp <-
  circlize::colorRamp2(
    c(floor(min(list.wt$markers_wt_z)), 0, ceiling(max(list.wt$markers_wt_z))),
    c("darkblue", "white", "darkred")
  )

ssplit <- stringr::str_split_fixed(colnames(list.wt$markers_wt_z), "_", 3)
list.wt$wt_colors_topannot <- list.ggplot$colorblind_cols[as.numeric(gsub(
  "C",
  "",
  unique(ssplit[, 2])
))]

ht_opt$message <- FALSE

# Now finally the heatmap
hm_wt_bulk <- Heatmap(
  matrix = list.wt$markers_wt_z,
  cluster_rows = FALSE,
  cluster_columns = FALSE,
  name = "relative expression",
  show_column_names = FALSE,
  show_row_names = FALSE,
  col = list.wt$wt.colorramp,
  column_split = list.wt$wt.lab,
  column_title = NULL,
  left_annotation = list.wt$on_left_wt,
  right_annotation = list.wt$on_right_wt,
  top_annotation = HeatmapAnnotation(foo = anno_block( # gp=gpar(fill=list.wt$wt_colors_topannot),
    labels = unique(list.wt$wt.lab),
    labels_gp = gpar(col = "black", fontsize = gg2gp(hm_text))
  )),
  heatmap_legend_param = list(
    legend_direction = "horizontal",
    legend_width = unit(4, "cm"),
    legend_position = "bottom",
    labels_gp = gpar(fontsize = gg2gp(hm_text)),
    color_bar = "continuous",
    title_position = "topcenter",
    title_gp = gpar(fontsize = gg2gp(hm_text))
  )
)


pdf(NULL)
Figure_1B <- draw(hm_wt_bulk, heatmap_legend_side = "bottom")
invisible(dev.off())
Figure_1B

PU.1 & Runx1 & Flt3 expression levels

pu1_levels_singlecell <-
  data.frame(
    logcounts = logcounts(sce[grep("Spi1$", rownames(sce), value = TRUE), ])[1, ],
    cluster = sce$cluster,
    genotype = sce$genotype
  ) %>%
  dplyr::mutate(cg = paste(genotype, cluster, sep = "_"))

Figure_1C <-
  pu1_levels_singlecell %>%
  dplyr::mutate(logcounts = dplyr::case_when(
    logcounts > quantile(logcounts, .995) ~ quantile(logcounts, .995),
    TRUE ~ logcounts
  )) %>%
  ggplot(aes(x = genotype, y = logcounts, fill = genotype)) +
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(~cluster, nrow = 1) +
  scale_fill_manual(name = "", values = list.ggplot$genotype_colors) +
  geom_vline(data = tibble(f = 2, x = c(-1, 1) * Inf), aes(xintercept = x), col = "black") +
  xlab("") +
  ylab("PU.1 expression [log2]") +
  guides(x = guide_axis(angle = 90)) +
  theme(legend.position = "bottom", legend.justification = "left", panel.spacing = unit(0, "lines")) +
  gg.noX +
  coord_cartesian()

Figure_1C

# Percent residual PU.1 expression in URE relative to WT
Appendix_Figure_S1H <-
  pu1_levels_singlecell %>%
  mutate(counts = 2^logcounts - 1) %>% # logcounts back to normal with prior of 1
  group_by(cg) %>%
  summarize(values = mean(counts)) %>%
  dplyr::mutate(
    cluster = gsub(".*_", "", cg),
    genotype = gsub("_.*", "", cg)
  ) %>%
  group_by(cluster) %>%
  summarise(percent_PU1_left_in_URE = round(100 * values[genotype == "URE"] / values[genotype == "WT"], 2)) %>%
  knitr::kable(.)

Appendix_Figure_S1H
cluster percent_PU1_left_in_URE
C1 2.33
C2 9.65
C3 20.00
C4 23.99
C5 28.45
C6 24.34
C7 10.21
C8 11.31
Appendix_Figure_S4B <-
  data.frame(
    logcounts = logcounts(sce[grep("Flt3$", rownames(sce), value = TRUE), ])[1, ],
    cluster = sce$cluster,
    genotype = sce$genotype
  ) %>%
  dplyr::mutate(cg = paste(genotype, cluster, sep = "_")) %>%
  dplyr::mutate(logcounts = dplyr::case_when(
    logcounts > quantile(logcounts, .995) ~ quantile(logcounts, .995),
    TRUE ~ logcounts
  )) %>%
  ggplot(aes(x = genotype, y = logcounts, fill = genotype)) +
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(~cluster, nrow = 1) +
  scale_fill_manual(name = "", values = list.ggplot$genotype_colors) +
  geom_vline(data = tibble(f = 2, x = c(-1, 1) * Inf), aes(xintercept = x), col = "black") +
  xlab("") +
  ylab("Flt3 expression [log2]") +
  guides(x = guide_axis(angle = 90)) +
  theme(legend.position = "bottom", legend.justification = "left", panel.spacing = unit(0, "lines")) +
  gg.noX +
  coord_cartesian()

Appendix_Figure_S4B

Cell numbers per cluster

normalized_cell_counts <-
  as.data.frame(table(sce$genotype, sce$cluster)) %>%
  magrittr::set_colnames(c("genotype", "cluster", "cellnumber")) %>%
  dplyr::group_by(genotype) %>%
  dplyr::mutate(cellnumber_normalized = (10000 * cellnumber / sum(cellnumber))) %>%
  dplyr::mutate(cellnumber_normalized = ceiling(cellnumber_normalized)) %>%
  as.data.frame()

# Cell numbers normalized to 10k cells per sample
Figure_1D <-
  normalized_cell_counts %>%
  ggplot(aes(x = cluster, y = cellnumber, fill = genotype)) +
  geom_bar(stat = "identity", position = "dodge") +
  ylab("norm. cell counts") +
  scale_fill_manual(values = list.ggplot$genotype_colors) +
  xlab(element_blank()) +
  theme(legend.position = "top", legend.justification = "left", legend.title = element_blank())

Figure_1D

Origin of lineage bias

We score C1 and C2 cells against the neutrophil (C3) and monocyte (C5) markers to detect when the PU.1-induced lineage bias begins.

singler_ref_genes <- list(C3 = pseudobulk_markers$WT_C3, C5 = pseudobulk_markers$WT_C5)
singler_ref_dds <- dds_pseudobulk[, dds_pseudobulk$cluster %in% c("C3", "C5") & dds_pseudobulk$genotype == "WT"]
singler_run1 <- SingleR::SingleR(
  test = sce[, sce$cluster %in% c("C1", "C2")],
  ref = assay(singler_ref_dds, "counts"),
  labels = droplevels(singler_ref_dds$cluster),
  genes = singler_ref_genes
)

# Show general shift towards a neutrophil profile
Figure_1E <-
  data.frame(cell = rownames(singler_run1), singler_run1$scores) %>%
  left_join(x = ., y = data.frame(colData(sce)), by = "cell") %>%
  filter(cluster %in% c("C1", "C2")) %>%
  mutate(cluster = gsub("C2", "C2 cells", gsub("C1", "C1 cells", cluster))) %>%
  ggplot(aes(x = C3, y = C5, color = genotype)) +
  geom_density_2d() +
  facet_wrap(~cluster, nrow = 1, scales = "free") +
  geom_abline(size = .25, lty = 2) +
  xlab("neutrophil lineage score") +
  ylab("monocyte lineage score") +
  scale_color_manual(values = list.ggplot$genotype_colors, name = "") +
  theme(legend.position = "top", legend.justification = "left")

Figure_1E

# Below here is all figure Figure 1F --
# Trajectory analysis -- order cells in pseudotime
s.sling <- sce[, sce$cluster %in% c("C1", "C2", "C3", "C4", "C5")]

reducedDim(s.sling, "corrected_slingshot") <- reducedDim(s.sling, "corrected")[, 1:49]

s.sling <- slingshot::slingshot(
  data = s.sling, clusterLabels = droplevels(s.sling$cluster),
  reducedDim = "corrected_slingshot", approx_points = 100,
  start.clus = "C1", end.clus = c("C4", "C5")
)
#> Using full covariance matrix
which_trajectory <-
  data.frame(colData(s.sling)) %>%
  dplyr::select(cluster, genotype, contains("slingPseudotime_")) %>%
  reshape2::melt(
    measure.vars = grep("slingPseudotime_", colnames(.), value = TRUE),
    variable.name = "trajectory", value.name = "pseudotime"
  ) %>%
  group_by(trajectory, cluster) %>%
  summarize(mean = quantile(pseudotime, .75, na.rm = TRUE)) %>%
  group_by(trajectory) %>%
  slice_max(order_by = mean, n = 1) %>%
  mutate(cluster = gsub("C6", "basophil", gsub("C5", "monocyte", gsub("C4", "neutrophil", cluster))))
#> `summarise()` has grouped output by 'trajectory'. You can override using the `.groups` argument.
which_trajectory
#> # A tibble: 2 x 3
#> # Groups:   trajectory [2]
#>   trajectory        cluster     mean
#>   <fct>             <chr>      <dbl>
#> 1 slingPseudotime_1 neutrophil 1.28 
#> 2 slingPseudotime_2 monocyte   0.719
# Plot monocyte and neutrophil progenitor signature along the C1 trajectory
signatures_to_score <- markers_wt[c("WT_C3", "WT_C5")] %>% setNames(c("neutrophil", "monocyte"))

# Lots of custom code to plot the lineage scores along trajectory because there is unequal numbers of cell,
# so we do binning of adjacent cells to have the same dimension for the x-axis in neutrophil and monocyte progenitors
Figure_1F <-
  lapply(names(signatures_to_score), function(x) {
    current_signature <- signatures_to_score[[x]]
    subset_to <- NULL

    # We subset to C1-C3 first so the loess (below) is not skewed at the C1 border
    if (x %in% c("monocyte")) {
      subset_to <- c("C1", "C2", "C5")
      use_trajectory <- as.character(which_trajectory[which_trajectory$cluster == "monocyte", "trajectory", drop = TRUE])
    }

    if (x %in% c("neutrophil")) {
      subset_to <- c("C1", "C2", "C3")
      use_trajectory <- as.character(which_trajectory[which_trajectory$cluster == "neutrophil", "trajectory", drop = TRUE])
    }

    subset_to <- if (is.null(subset_to)) unique(sce$cluster) else subset_to

    current_signature_found <- intersect(current_signature, rownames(s.sling))
    sx <- s.sling[current_signature_found, s.sling$cluster %in% subset_to]

    # The score is the mean of the mean-subtracted logcounts for the signature genes
    d <- assay(sx, "logcounts")
    d_norm <- d - rowMeans(d)

    # Now calculate the median per cell
    d_score <- as.data.frame(apply(d_norm, 2, mean)) %>%
      rownames_to_column("cell") %>%
      setNames(c("cell", "score"))
    d_cold <- data.frame(colData(sx)) %>% dplyr::select("cell", "cluster", "genotype", all_of(use_trajectory))
    colnames(d_cold)[colnames(d_cold) == all_of(use_trajectory)] <- "pseudotime"
    d_score <- d_score %>% left_join(x = ., y = d_cold, by = "cell")

    # Bin each cluster into 10 chunks and take the median of the per-cell score. This compensates zeros and also
    # deals with uneven cell numbers between genotypes, so in the end we can show equally-sized clusters.
    n_chunks <- 100
    d_data <-
      d_score %>%
      arrange(pseudotime) %>%
      dplyr::group_by(cluster, genotype) %>%
      dplyr::mutate(chunk = rep(1:n_chunks, each = n() / n_chunks, length.out = n())) %>%
      dplyr::group_by(cluster, chunk, genotype) %>%
      dplyr::summarise(score = median(score)) %>%
      group_by(genotype) %>%
      arrange(cluster, genotype) %>%
      mutate(idx = 1:n()) %>%
      group_by(genotype) %>%
      mutate(
        signature = x,
        y = predict(loess(score ~ idx, span = .25, family = "symmetric"))
      ) %>%
      filter(cluster %in% c("C1"))

    ymax <- max(d_data$score)
    ymin <- min(d_data$score)
    yrange <- seq(ymin, ymax, length.out = 3)

    p <-
      d_data %>%
      ggplot(aes(x = idx, y = score, color = genotype)) +
      geom_point(size = .2) +
      geom_line(aes(y = y), show.legend = FALSE) +
      guides(color = guide_legend(override.aes = list(size = 3))) +
      xlab("relative pseudotime") +
      ylab("relative expression score") +
      scale_x_continuous(breaks = c(1, n_chunks), labels = c("low", "high")) +
      scale_y_continuous(breaks = yrange, labels = c(-1, 0, 1)) +
      scale_color_manual(values = list.ggplot$genotype_colors, name = "") +
      facet_wrap(~signature)

    if (x == "monocyte") {

      # For aesthetics we remove the yaxis of the monocyte plot.
      # It's a relative score anyway and only the difference per signature between genotype counts so it's ok I think
      p <- p + theme(
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()
      )
    }

    return(p)
  }) %>% patchwork::wrap_plots() + plot_layout(guides = "collect") & theme(legend.position = "top", legend.justification = "left")
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
Figure_1F

Clustering + heatmap DEG signatures

Use the DEGs from the pseudobulk analysis of URE vs WT for clusters C1, C2 and C3 and do hclust. We included C5 into the clustering as we wanted to see how genes behave in the monocyte progenitors, but excluded C4 as this was not present in WT samples and very small in URE samples.

This is all for figure 2:

# Extract union of DEGs
fdr_cutoff <- 0.005
fc_cutoff <- 1.5

DEGs_URE_WT <-
  lapply(ure_wt_degs, function(x) {
    x %>%
      filter(padj < fdr_cutoff & abs(logFC) > log2(fc_cutoff)) %>%
      pull(Gene)
  }) %>%
  unlist() %>%
  unique()

# Cluster and plot
hm_text2 <- 4

DEGs_URE_WT.Z <- list()

DEGs_URE_WT.Z$reps <-
  rowScale(rld[DEGs_URE_WT, grep("_C1|_C2|_C3|_C5", colnames(rld))])

# Cluster rows using ward.D2 (and euclidean distance which is the default)
hclust_method <- "ward.D2"
hclust.row <- stats::hclust(dist(DEGs_URE_WT.Z$reps), method = hclust_method)

# trim to 1st and 99th percentile for better color scale
# this is just for the heatmap, the hclust runs on the untrimmed data
DEGs_URE_WT.Z$winsorized <- scale_by_quantile(DEGs_URE_WT.Z$reps, 0.01, 0.99)

# top annotion for the heatmap:
URE_WT_topann <- gsub("_rep.*", "", colnames(DEGs_URE_WT.Z$reps))

# add some genes to the right
highlighted_genes <- list()

highlighted_genes$signature1 <- c("Flt3", "Il1r1", "Il4ra", "Il6st")
highlighted_genes$signature2 <- c("Spi1", "Csf1r", "Csf2ra", "Ccr2", "Ifngr2")
highlighted_genes$signature3 <- c("Vmp1", "Acss2", "Jak3", "Bcl2", "Notch1", "Jag2")
highlighted_genes$signature4 <- c("S100a8", "S100a9", "Ctsg", "Elane")

if (!sum(as.character(unlist(highlighted_genes)) %in% rowData(sce)$gene_name) == length(unlist(highlighted_genes))) {
  stop("Check highlighted_genes names!")
}

DEGs_URE_WT.Z$on_right <-
  rowAnnotation(foo = anno_mark(
    at = match(unlist(highlighted_genes), gsub(".*_", "", rownames(DEGs_URE_WT.Z$winsorized))),
    labels = gsub(".*_", "", as.character(unlist(highlighted_genes))), side = "right",
    labels_gp = gpar(fontsize = gg2gp(hm_text * .75))
  ))

# We decide to split into four groups based on hclust/cutree:
Figure_2A <- Heatmap(
  matrix = DEGs_URE_WT.Z$winsorized,
  show_row_names = FALSE,
  name = "relative expression",
  right_annotation = DEGs_URE_WT.Z$on_right,
  col = circlize::colorRamp2(
    c(
      floor(min(DEGs_URE_WT.Z$winsorized)),
      0, max(ceiling(DEGs_URE_WT.Z$winsorized))
    ),
    c("#56B4E9", "#000000", "#F0E442")
  ), # lightblue-black-yellow
  row_split = 4,
  row_title = " ",
  # row_title_gp=gpar(fontsize=gg2gp(list.ggplot$textsize)),
  cluster_rows = hclust.row,
  cluster_columns = FALSE,
  show_column_names = FALSE,
  column_title = NULL,
  column_split = factor(URE_WT_topann, levels = unique(URE_WT_topann)),
  top_annotation = HeatmapAnnotation(
    foo = anno_block(
      gp = gpar(fill = "grey"),
      labels = unique(URE_WT_topann),
      labels_gp = gpar(col = "black", fontsize = gg2gp(hm_text2))
    )
  ),
  heatmap_legend_param = list(
    legend_direction = "vertical",
    legend_width = unit(6, "cm"),
    labels_gp = gpar(fontsize = gg2gp(hm_text2)),
    color_bar = "continuous",
    title_position = "leftcenter-rot",
    title_gp = gpar(fontsize = gg2gp(hm_text2))
  )
)

pdf(NULL)
Figure_2A <- draw(Figure_2A)
invisible(dev.off())
Figure_2A

# The genes per signature
rod_ure_wt <- row_order(Figure_2A)
signatures <- lapply(1:length(rod_ure_wt), function(x) {
  data.frame(signature = paste0("signature", x), gene = rownames(DEGs_URE_WT.Z$winsorized)[rod_ure_wt[[x]]])
}) %>%
  do.call(rbind, .) %>%
  rbind(., data.frame(signature = "all_genes_tested", gene = rownames_dds_ure_wt)) %>%
  as_tibble()

# Now the pseudotime figure 2B
# Plot monocyte and neutrophil progenitor signature along the C1 trajectory
degs_to_score <- sapply(unique(signatures$signature), function(x) signatures[signatures$signature == x, "gene", drop = TRUE], simplify = FALSE)
degs_to_score$all_genes_tested <- NULL

Figure_2C <-
  lapply(names(degs_to_score), function(x) {
    current_signature <- degs_to_score[[x]]
    subset_to <- c("C1", "C2", "C3")
    use_trajectory <- as.character(which_trajectory[which_trajectory$cluster == "neutrophil", "trajectory", drop = TRUE])
    sx <- s.sling[current_signature, s.sling$cluster %in% subset_to]

    # The score is the mean of the mean-subtracted logcounts for the signature genes
    d <- assay(sx, "logcounts")
    d_norm <- d - rowMeans(d)

    # Now calculate the median per cell
    d_score <- as.data.frame(apply(d_norm, 2, mean)) %>%
      rownames_to_column("cell") %>%
      setNames(c("cell", "score"))
    d_cold <- data.frame(colData(sx)) %>% dplyr::select("cell", "cluster", "genotype", all_of(use_trajectory))
    colnames(d_cold)[colnames(d_cold) == all_of(use_trajectory)] <- "pseudotime"
    d_score <- d_score %>% left_join(x = ., y = d_cold, by = "cell")

    # Bin each cluster into 10 chunks and take the median of the per-cell score. This compensates zeros and also
    # deals with uneven cell numbers between genotypes, so in the end we can show equally-sized clusters.
    n_chunks <- 100
    d_data <-
      d_score %>%
      arrange(pseudotime) %>%
      dplyr::group_by(cluster, genotype) %>%
      dplyr::mutate(chunk = rep(1:n_chunks, each = n() / n_chunks, length.out = n())) %>%
      dplyr::group_by(cluster, chunk, genotype) %>%
      dplyr::summarise(score = median(score)) %>%
      group_by(genotype) %>%
      arrange(cluster, genotype) %>%
      mutate(idx = 1:n()) %>%
      group_by(genotype) %>%
      mutate(
        signature = x,
        y = predict(loess(score ~ idx, span = .25, family = "symmetric"))
      )

    ymax <- max(d_data$score)
    ymin <- min(d_data$score)
    yrange <- seq(ymin, ymax, length.out = 3)

    seq1 <- seq(1, n_chunks * 4, n_chunks)

    p <-
      d_data %>%
      ggplot(aes(x = idx, y = score, color = genotype)) +
      geom_point(size = .2) +
      geom_line(aes(y = y), show.legend = FALSE) +
      guides(color = guide_legend(override.aes = list(size = 3))) +
      xlab("relative pseudotime") +
      ylab("relative expression score") +
      scale_x_continuous(breaks = c(50, 150, 250), labels = c("C1", "C2", "C3")) +
      scale_y_continuous(breaks = yrange, labels = c(-1, 0, 1)) +
      scale_color_manual(values = list.ggplot$genotype_colors, name = "") +
      facet_wrap(~signature) +
      ylab("") +
      geom_vline(xintercept = c(seq1), lty = 2, lwd = .5) +
      guides(x = guide_axis(angle = 0))

    if (x %in% c("signature1", "signature2", "signature3")) {

      # For aesthetics we remove the yaxis of the monocyte plot.
      # It's a relative score anyway and only the difference per signature between genotype counts so it's ok I think
      p <- p + theme(
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()
      )
    }

    return(p)
  }) %>% patchwork::wrap_plots(ncol = 1) + plot_layout(guides = "collect") & theme(legend.position = "none")
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'cluster', 'chunk'. You can override using the `.groups` argument.
Figure_2C

# This is how the enrichment analysis was done that went into dataset EV2. We will not run here because these legacy versions
# of gprofiler sometimes choke and break the rendering (error 500 - whatever this is) appears.
if (2 < 1) {
  gprofiler2::set_base_url("http://biit.cs.ut.ee/gprofiler_archive3/e105_eg52_p16")

  lvl_signature <- paste0("signature", 1:4)

  signature_enrichments <- lapply(lvl_signature, function(x) {
    a <- gsub(".*_", "", signatures[signatures$signature == x, "gene", drop = TRUE])
    b <- gsub(".*_", "", signatures[signatures$signature == "all_genes_tested", "gene", drop = TRUE])
    r <- run_gost(unique(a), unique(b))

    return(r)
  }) %>% setNames(lvl_signature)

  names(signature_enrichments) <- paste0("enrichments_", names(signature_enrichments))

  signature_genes <- as.data.frame(signatures) %>% dplyr::filter(signature %in% paste0("signature", 1:4))

  Dataset_EV2 <- c(list(genes = signature_genes), signature_enrichments)

  openxlsx::write.xlsx(x = Dataset_EV2, file = paste0(outdir, "/Dataset_EV2_raw.xlsx"), overwrite = TRUE)
} 

# We only write the gene signatures to EV2 as this is needed in downstream scripts:
signature_genes <- as.data.frame(signatures) %>% dplyr::filter(signature %in% paste0("signature", 1:4))
Dataset_EV2 <- list(genes = signature_genes)
openxlsx::write.xlsx(x = Dataset_EV2, file = paste0(outdir, "/Dataset_EV2_raw.xlsx"), overwrite = TRUE)

This chunk below is not evaluated and serves only for documentation how the processed files for the GEO submission for the scRNA-seq were made:

# Save the final sce as processed data for GEO submission
supplement_singlecell <- paste0(outdir, "/lists/scrnaseq/")
suppressWarnings(dir.create(supplement_singlecell))

counts_filtered <- assay(sce, "counts")
logcounts_filtered <- assay(sce, "logcounts")

metadata_filtered <- colData(sce)[, c("barcode", "sample", "genotype", "replicate", "cell", "cluster", "sizeFactor")] %>%
  as.data.frame() %>%
  relocate(cell)

genes_filtered <- rowData(sce)[, c("gene_id", "gene_name", "gene_type")] %>% as.data.frame()

fastmnn_corrected_filtered <- reducedDim(sce, "corrected") %>% as.data.frame()
colnames(fastmnn_corrected_filtered) <- paste0("fastmnn_corrected_dim", 1:ncol(fastmnn_corrected_filtered))
fastmnn_corrected_filtered <- data.frame(cell = rownames(fastmnn_corrected_filtered), fastmnn_corrected_filtered)

umap_filtered <- reducedDim(sce, "UMAP") %>% as.data.frame()
colnames(umap_filtered) <- paste0("UMAP", 1:ncol(umap_filtered))
umap_filtered <- data.frame(cell = rownames(umap_filtered), umap_filtered)

iter <- c("counts_filtered", "logcounts_filtered", "metadata_filtered", "genes_filtered", "fastmnn_corrected_filtered", "umap_filtered")

lapply(iter, function(x) {
  if (grepl("counts_", x)) {
    outfile <- paste0(outdir, "/lists/", x, ".mtx")
    Matrix::writeMM(obj = get(x), file = outfile)
  } else {
    outfile <- paste0(outdir, "/lists/", x, ".txt")
    data.table::fwrite(x = get(x), file = outfile, col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
  }

  system(command = paste0("gzip --best --force ", outfile))

  return(NULL)
}) %>% invisible()

Revision Code

Code for Appendix_Figure_S1F, S1G, S1I, S1J and Figure 2B were added during the revision.

# Figure 1A => the WT marker genes
w <- sce[rownames(list.wt$markers_wt_z), !sce$cluster %in% c("C4") & sce$genotype == "WT"]
colData(w) <- droplevels.data.frame(colData(w))

wt_rel_longer <-
  scale_by_quantile(t(scale(t(as.matrix(assay(w[list.wt$markers_wt_selected, ], "logcounts"))))), .05, .95) %>%
  data.frame() %>%
  rownames_to_column("gene") %>%
  reshape2::melt(variable.name = "cell", value.name = "rel_expr") %>%
  left_join(x = ., y = data.frame(colData(w)), by = "cell") %>%
  group_by(gene, cluster) %>%
  summarize(rel_expr = mean(rel_expr))
#> Using gene as id variables
#> `summarise()` has grouped output by 'gene'. You can override using the `.groups` argument.
wt_pexpr_longer <-
  get_pexpr(data = assay(w, "logcounts"), group = w$cluster) %>%
  data.frame() %>%
  rownames_to_column("gene") %>%
  reshape2::melt(variable.name = "cluster", value.name = "percent")
#> Using gene as id variables
# Boxplot that summarizes percent of expression of markers per cluster
Appendix_Figure_S1F <-
  lapply(gsub("WT_", "", names(markers_wt)), function(x) {
    d <- wt_pexpr_longer %>% filter(cluster %in% x & gene %in% markers_wt[[paste0("WT_", x)]])
    d
  }) %>%
  bind_rows() %>%
  ggplot(aes(x = cluster, y = percent)) +
  geom_boxplot() +
  ylim(c(0, 100)) +
  geom_point(position = position_jitter(width = .2, height = 0), size = 1)

Appendix_Figure_S1F

# and a dotplot of the selected marker genes that are printed left and right of the heatmap in 1A
Appendix_Figure_S1G <-
  data.frame(gene = list.wt$markers_wt_selected) %>%
  left_join(x = ., y = wt_pexpr_longer, by = "gene") %>%
  mutate(joiner = paste0(gene, cluster)) %>%
  left_join(x = ., y = wt_rel_longer %>% mutate(joiner = paste0(gene, cluster)), by = "joiner") %>%
  rename(cluster = cluster.x, gene = gene.x) %>%
  mutate(gene = factor(gsub(".*_", "", gene), levels = gsub(".*_", "", list.wt$markers_wt_selected))) %>%
  ggplot(aes(x = cluster, y = fct_rev(gene), size = percent, color = rel_expr)) +
  geom_point() +
  scale_color_viridis(name = "rel.expr") +
  ylab("gene")

Appendix_Figure_S1G

# Same for Figure 2A
signatures_list <- split(signatures$gene, signatures$signature)
signatures_list$all_genes_tested <- NULL
signatures_df <- lapply(names(signatures_list), function(x) data.frame(gene = signatures_list[[x]]) %>% mutate(signature = x)) %>%
  bind_rows()

w <- sce[unlist(signatures_list), sce$cluster %in% c("C1", "C2", "C3", "C5")]
w <- w[, !(w$cluster == "C5" & w$genotype == "URE")]
colData(w) <- droplevels.data.frame(colData(w))

pexpr_longer <-
  get_pexpr(data = assay(w, "logcounts"), group = w$cg) %>%
  data.frame() %>%
  rownames_to_column("gene") %>%
  reshape2::melt(variable.name = "cg", value.name = "percent") %>%
  left_join(x = ., y = signatures_df, by = "gene") %>%
  separate(col = "cg", sep = "_", into = c("genotype", "cluster")) %>%
  mutate(genotype = factor(genotype, levels = c("WT", "URE")))
#> Using gene as id variables
Figure_2B <-
  pexpr_longer %>%
  ggplot(aes(x = cluster, y = percent, fill = genotype)) +
  geom_boxplot(outlier.shape = NA, position = position_dodge(preserve = "single")) +
  facet_wrap(~signature, ncol = 1, scales = "free_y") +
  scale_fill_manual(values = list.ggplot$genotype_colors, name = "") +
  ylab("percent expression") +
  theme(legend.position = "none")

Figure_2B

# PU.1-colored UMAP
um <- data.frame(reducedDim(sce, "UMAP")) %>% setNames(c("UMAP1", "UMAP2"))

Appendix_Figure_S1I <-
  cbind(data.frame(PU.1 = as.numeric(assay(sce["ENSMUSG00000002111_Spi1", ], "logcounts"))), colData(sce), um) %>%
  ggplot(aes(x = UMAP1, y = UMAP2, color = PU.1)) +
  geom_point(size = .1) +
  facet_wrap(~genotype, ncol = 1) +
  scale_color_viridis() +
  coord_fixed()

Appendix_Figure_S1I

# Per reviewer request, we also add a UMAP with density per genotype, so one can appreciate
# the redistribution betwwen monocyte and neutrophil compartments
umap_data <- plot_umap(sce, by = "cluster")$data

Appendix_Figure_S1J <-
  umap_data %>%
  mutate(genotype = factor(if_else(grepl("WT", rownames(.)), "WT", "URE"),
    levels = lvl_genotype
  )) %>%
  ggplot(aes(x = dim1, y = dim2)) +
  geom_point(aes(color = group), size = .01, alpha = .5) +
  geom_density_2d(color = "black", lwd = .25, contour_var = "ndensity") +
  facet_wrap(~genotype, ncol = 1) +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  xlab("UMAP1") +
  ylab("UMAP2") +
  scale_color_manual(values = list.ggplot$colorblind_cols, name = "cluster")

Appendix_Figure_S1J

# This was a reviewer request we provided as "reviewer-only", asking whether the PU.1-deficient progenitors show
# evidence to be somewhat lymphoid.-biased. We comment it out, feel free to run if you want.

# library(org.Mm.eg.db)
# bcell <- AnnotationDbi::select(org.Mm.eg.db,
#   keytype = "GOALL", keys = "GO:0030183",
#   columns = "ENSEMBL"
# )[, "ENSEMBL"]
#
# tcell <- AnnotationDbi::select(org.Mm.eg.db,
#   keytype = "GOALL", keys = "GO:0030217",
#   columns = "ENSEMBL"
# )[, "ENSEMBL"]
#
# sx <- sce[, sce$cluster == "C1"]
# l <- list(bcell = bcell, tcell = tcell)
# u <- UCell::StoreRankings_UCell(assay(sx))
# rownames(u) <- gsub("_.*", "", rownames(u))
#
# u_lym <- UCell::ScoreSignatures_UCell(precalc.ranks = u, features = l)
# colnames(u_lym) <- gsub("_UCell", "", colnames(u_lym))
#
# reviewer_only_lymphoid <-
#   data.frame(u_lym) %>%
#   rownames_to_column("genotype") %>%
#   mutate(genotype = gsub("_.*", "", genotype)) %>%
#   reshape2::melt(variable.name = "signature") %>%
#   group_by(signature) %>%
#   mutate(value = value - mean(value), genotype = factor(genotype, c("WT", "URE"))) %>%
#   ggplot(aes(x = signature, y = value, color = genotype)) +
#   geom_boxplot() +
#   scale_color_manual(values = list.ggplot$genotype_colors) +
#   ylab("relative geneset score")
#
# reviewer_only_lymphoid